-
Notifications
You must be signed in to change notification settings - Fork 17
/
chars-disallow.js
executable file
·7202 lines (6959 loc) · 341 KB
/
chars-disallow.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
export default [
// conventions:
// - remove invisibles
// - remove punctuation
// - remove vocalization annotations
// - choose heavy variants if available
// - collapse similar items to one if possible
// unclear:
// - left-right mirrors
// zero width
0x200C, // () ZERO WIDTH NON-JOINER
0x200D, // () ZERO WIDTH JOINER
// alternative stops
// reason: string split on "." w/o normalize, easy scan for ".eth"
// https://discuss.ens.domains/t/ens-name-normalization/8652/6
// see: examples.js => replace_sloppy()
0x3002, // (。) IDEOGRAPHIC FULL STOP
0xFF0E, // (.) FULLWIDTH FULL STOP
0xFF61, // (。) HALFWIDTH IDEOGRAPHIC FULL STOP
// scripted stops
0xFE52, // (﹒) SMALL FULL STOP
0x589, // (։) ARMENIAN FULL STOP
0x61D, // (؝) ARABIC END OF TEXT MARK
0x61E, // (؞) ARABIC TRIPLE DOT PUNCTUATION MARK
0x6D4, // (۔) ARABIC FULL STOP
0x700, // (܀) SYRIAC END OF PARAGRAPH
0x701, // (܁) SYRIAC SUPRALINEAR FULL STOP
0x702, // (܂) SYRIAC SUBLINEAR FULL STOP
0x964, // (।) DEVANAGARI DANDA (https://en.wikipedia.org/wiki/Danda)
0x1362, // (።) ETHIOPIC FULL STOP
0x166E, // (᙮) CANADIAN SYLLABICS FULL STOP
0x1803, // (᠃) MONGOLIAN FULL STOP
0x1809, // (᠉) MONGOLIAN MANCHU FULL STOP
0x2CF9, // (⳹) COPTIC OLD NUBIAN FULL STOP
0x2CFE, // (⳾) COPTIC FULL STOP
0x2E3C, // (⸼) STENOGRAPHIC FULL STOP
0xA4FF, // (꓿) LISU PUNCTUATION FULL STOP
0xA60E, // (꘎) VAI FULL STOP
0xA6F3, // (꛳) BAMUM FULL STOP
0x16AF5, // (𖫵) BASSA VAH FULL STOP
0x16E98, // (𖺘) MEDEFAIDRIN FULL STOP
0x1BC9F, // (𛲟) DUPLOYAN PUNCTUATION CHINOOK FULL STOP
0x740, // (◌݀) SYRIAC FEMININE DOT (added: 20221125)
0xA4F8, // (ꓸ) LISU LETTER TONE MYA TI
0xA4F9, // (ꓹ) LISU LETTER TONE NA PO
0xA4FA, // (ꓺ) LISU LETTER TONE MYA CYA
0xA4FB, // (ꓻ) LISU LETTER TONE MYA BO
0xA4FC, // (ꓼ) LISU LETTER TONE MYA NA
0xA4FD, // (ꓽ) LISU LETTER TONE MYA JEU
// exclaim/question mark
0xA1, // (¡) INVERTED EXCLAMATION MARK
0xBF, // (¿) INVERTED QUESTION MARK
0x55E, // (՞) ARMENIAN QUESTION MARK
0x61F, // (؟) ARABIC QUESTION MARK
0x1945, // (᥅) LIMBU QUESTION MARK
0x203D, // (‽) INTERROBANG
0x2E2E, // (⸮) REVERSED QUESTION MARK
0x2E53, // (⹓) MEDIEVAL EXCLAMATION MARK
0x2E54, // (⹔) MEDIEVAL QUESTION MARK
0x2762, // (❢) HEAVY EXCLAMATION MARK ORNAMENT
0xA60F, // (꘏) VAI QUESTION MARK
0xA6F7, // (꛷) BAMUM QUESTION MARK
0x2E18, // (⸘) INVERTED INTERROBANG
0x11143, // (𑅃) CHAKMA QUESTION MARK
0x1E95F, // (𞥟) ADLAM INITIAL QUESTION MARK
0x1F679, // (🙹) HEAVY INTERROBANG ORNAMENT
0x1F67A, // (🙺) SANS-SERIF INTERROBANG ORNAMENT
0x1F67B, // (🙻) HEAVY SANS-SERIF INTERROBANG ORNAMENT
0x7F9, // (߹) NKO EXCLAMATION MARK
0x1E95E, // (𞥞) ADLAM INITIAL EXCLAMATION MARK
0x1944, // (᥄) LIMBU EXCLAMATION MARK
0x1945, // (᥅) LIMBU QUESTION MARK
0xFE56, // (﹖) SMALL QUESTION MARK
0xFE57, // (﹗) SMALL EXCLAMATION MARK
0xFF01, // (!) FULLWIDTH EXCLAMATION MARK
0xFF1F, // (?) FULLWIDTH QUESTION MARK
//0x2047, // (⁇) DOUBLE QUESTION MARK
//0x2049, // (⁉) EXCLAMATION QUESTION MARK
0x1FBC4, // (🯄) NEGATIVE SQUARED QUESTION MARK
0x1F18A, // (🆊) CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P
// 20230220: moved from chars-mapped.js
// note: IDNA maps the non-negated versions:
// "2460 ; mapped ; 0031 # 1.1 CIRCLED DIGIT ONE
// "2473 ; mapped ; 0032 0030 # 1.1 CIRCLED NUMBER TWENTY
// "24B6 ; mapped ; 0061 # 1.1 CIRCLED LATIN CAPITAL LETTER A"
// "24CF ; mapped ; 007A # 1.1 CIRCLED LATIN CAPITAL LETTER Z"
// negative circled
0x24FF, // (⓿) NEGATIVE CIRCLED DIGIT ZERO
0x24EB, // (⓫) NEGATIVE CIRCLED NUMBER ELEVEN
0x24EC, // (⓬) NEGATIVE CIRCLED NUMBER TWELVE
0x24ED, // (⓭) NEGATIVE CIRCLED NUMBER THIRTEEN
0x24EE, // (⓮) NEGATIVE CIRCLED NUMBER FOURTEEN
0x24EF, // (⓯) NEGATIVE CIRCLED NUMBER FIFTEEN
0x24F0, // (⓰) NEGATIVE CIRCLED NUMBER SIXTEEN
0x24F1, // (⓱) NEGATIVE CIRCLED NUMBER SEVENTEEN
0x24F2, // (⓲) NEGATIVE CIRCLED NUMBER EIGHTEEN
0x24F3, // (⓳) NEGATIVE CIRCLED NUMBER NINETEEN
0x24F4, // (⓴) NEGATIVE CIRCLED NUMBER TWENTY
// double-circled
0x24F5, // (⓵) DOUBLE CIRCLED DIGIT ONE
0x24F6, // (⓶) DOUBLE CIRCLED DIGIT TWO
0x24F7, // (⓷) DOUBLE CIRCLED DIGIT THREE
0x24F8, // (⓸) DOUBLE CIRCLED DIGIT FOUR
0x24F9, // (⓹) DOUBLE CIRCLED DIGIT FIVE
0x24FA, // (⓺) DOUBLE CIRCLED DIGIT SIX
0x24FB, // (⓻) DOUBLE CIRCLED DIGIT SEVEN
0x24FC, // (⓼) DOUBLE CIRCLED DIGIT EIGHT
0x24FD, // (⓽) DOUBLE CIRCLED DIGIT NINE
0x24FE, // (⓾) DOUBLE CIRCLED NUMBER TEN
// negative circled
0x2776, // (❶) DINGBAT NEGATIVE CIRCLED DIGIT ONE
0x2777, // (❷) DINGBAT NEGATIVE CIRCLED DIGIT TWO
0x2778, // (❸) DINGBAT NEGATIVE CIRCLED DIGIT THREE
0x2779, // (❹) DINGBAT NEGATIVE CIRCLED DIGIT FOUR
0x277A, // (❺) DINGBAT NEGATIVE CIRCLED DIGIT FIVE
0x277B, // (❻) DINGBAT NEGATIVE CIRCLED DIGIT SIX
0x277C, // (❼) DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
0x277D, // (❽) DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
0x277E, // (❾) DINGBAT NEGATIVE CIRCLED DIGIT NINE
0x277F, // (❿) DINGBAT NEGATIVE CIRCLED NUMBER TEN
// circled sans-serif
0x1F10B,// (🄋) DINGBAT CIRCLED SANS-SERIF DIGIT ZERO
0x2780, // (➀) DINGBAT CIRCLED SANS-SERIF DIGIT ONE
0x2781, // (➁) DINGBAT CIRCLED SANS-SERIF DIGIT TWO
0x2782, // (➂) DINGBAT CIRCLED SANS-SERIF DIGIT THREE
0x2783, // (➃) DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
0x2784, // (➄) DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
0x2785, // (➅) DINGBAT CIRCLED SANS-SERIF DIGIT SIX
0x2786, // (➆) DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
0x2787, // (➇) DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
0x2788, // (➈) DINGBAT CIRCLED SANS-SERIF DIGIT NINE
0x2789, // (➉) DINGBAT CIRCLED SANS-SERIF NUMBER TEN
// negative circled sans-serif
0x1F10C,// (🄌) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
0x278A, // (➊) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
0x278B, // (➋) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
0x278C, // (➌) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
0x278D, // (➍) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
0x278E, // (➎) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
0x278F, // (➏) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
0x2790, // (➐) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
0x2791, // (➑) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
0x2792, // (➒) DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
0x2793, // (➓) DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
// negative circled => a-z
0x1F150, // (🅐) NEGATIVE CIRCLED LATIN CAPITAL LETTER A
0x1F151, // (🅑) NEGATIVE CIRCLED LATIN CAPITAL LETTER B
0x1F152, // (🅒) NEGATIVE CIRCLED LATIN CAPITAL LETTER C
0x1F153, // (🅓) NEGATIVE CIRCLED LATIN CAPITAL LETTER D
0x1F154, // (🅔) NEGATIVE CIRCLED LATIN CAPITAL LETTER E
0x1F155, // (🅕) NEGATIVE CIRCLED LATIN CAPITAL LETTER F
0x1F156, // (🅖) NEGATIVE CIRCLED LATIN CAPITAL LETTER G
0x1F157, // (🅗) NEGATIVE CIRCLED LATIN CAPITAL LETTER H
0x1F158, // (🅘) NEGATIVE CIRCLED LATIN CAPITAL LETTER I
0x1F159, // (🅙) NEGATIVE CIRCLED LATIN CAPITAL LETTER J
0x1F15A, // (🅚) NEGATIVE CIRCLED LATIN CAPITAL LETTER K
0x1F15B, // (🅛) NEGATIVE CIRCLED LATIN CAPITAL LETTER L
0x1F15C, // (🅜) NEGATIVE CIRCLED LATIN CAPITAL LETTER M
0x1F15D, // (🅝) NEGATIVE CIRCLED LATIN CAPITAL LETTER N
0x1F15E, // (🅞) NEGATIVE CIRCLED LATIN CAPITAL LETTER O
0x1F15F, // (🅟) NEGATIVE CIRCLED LATIN CAPITAL LETTER P
0x1F160, // (🅠) NEGATIVE CIRCLED LATIN CAPITAL LETTER Q
0x1F161, // (🅡) NEGATIVE CIRCLED LATIN CAPITAL LETTER R
0x1F162, // (🅢) NEGATIVE CIRCLED LATIN CAPITAL LETTER S
0x1F163, // (🅣) NEGATIVE CIRCLED LATIN CAPITAL LETTER T
0x1F164, // (🅤) NEGATIVE CIRCLED LATIN CAPITAL LETTER U
0x1F165, // (🅥) NEGATIVE CIRCLED LATIN CAPITAL LETTER V
0x1F166, // (🅦) NEGATIVE CIRCLED LATIN CAPITAL LETTER W
0x1F167, // (🅧) NEGATIVE CIRCLED LATIN CAPITAL LETTER X
0x1F168, // (🅨) NEGATIVE CIRCLED LATIN CAPITAL LETTER Y
0x1F169, // (🅩) NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
// sectioning
0xA7, // (§) SECTION SIGN
0x2E39, // (⸹) TOP HALF SECTION SIGN
0xB6, // (¶) PILCROW SIGN
0x2E3F, // (⸿) CAPITULUM
0x204B, // (⁋) REVERSED PILCROW SIGN
0x2761, // (❡) CURVED STEM PARAGRAPH SIGN ORNAMENT
0x2E4D, // (⹍) PARAGRAPHUS MARK
0xA74D, // (ꝍ) LATIN SMALL LETTER O WITH LOOP
0xA753, // (ꝓ) LATIN SMALL LETTER P WITH FLOURISH
0xA755, // (ꝕ) LATIN SMALL LETTER P WITH SQUIRREL TAIL
0xA75B, // (ꝛ) LATIN SMALL LETTER R ROTUNDA
0xA75D, // (ꝝ) LATIN SMALL LETTER RUM ROTUNDA
0xA769, // (ꝩ) LATIN SMALL LETTER VEND
0xA76D, // (ꝭ) LATIN SMALL LETTER IS
0xA771, // (ꝱ) LATIN SMALL LETTER DUM
0xA772, // (ꝲ) LATIN SMALL LETTER LUM
0xA773, // (ꝳ) LATIN SMALL LETTER MUM
0xA774, // (ꝴ) LATIN SMALL LETTER NUM
0xA775, // (ꝵ) LATIN SMALL LETTER RUM
0xA777, // (ꝷ) LATIN SMALL LETTER TUM
0xA778, // (ꝸ) LATIN SMALL LETTER UM
0xA77A, // (ꝺ) LATIN SMALL LETTER INSULAR D
0xA77C, // (ꝼ) LATIN SMALL LETTER INSULAR F
0xA783, // (ꞃ) LATIN SMALL LETTER INSULAR R
0xA785, // (ꞅ) LATIN SMALL LETTER INSULAR S
0xA787, // (ꞇ) LATIN SMALL LETTER INSULAR T
0xA797, // (ꞗ) LATIN SMALL LETTER B WITH FLOURISH
0xA79B, // (ꞛ) LATIN SMALL LETTER VOLAPUK AE
0xA79D, // (ꞝ) LATIN SMALL LETTER VOLAPUK OE
0xA79F, // (ꞟ) LATIN SMALL LETTER VOLAPUK UE
0xA7C1, // (ꟁ) LATIN SMALL LETTER OLD POLISH O
0xA7C3, // (ꟃ) LATIN SMALL LETTER ANGLICANA W
0xA7D1, // (ꟑ) LATIN SMALL LETTER CLOSED INSULAR G
0xA7D7, // (ꟗ) LATIN SMALL LETTER MIDDLE SCOTS S
0xA7D9, // (ꟙ) LATIN SMALL LETTER SIGMOID S
// 0xDE, // (Þ) LATIN CAPITAL LETTER THORN (disallowed idna)
// 20230804: iceland uses this
// https://en.wikipedia.org/wiki/Thorn_(letter)
// node tools/reg-count.js DE === 0
0xFE, // (þ) LATIN SMALL LETTER THORN
0x1BF, // (ƿ) LATIN LETTER WYNN
0xA7D5, // (ꟕ) LATIN SMALL LETTER DOUBLE WYNN
0xF0, // (ð) LATIN SMALL LETTER ETH
0xA765, // (ꝥ) LATIN SMALL LETTER THORN WITH STROKE
0xA767, // (ꝧ) LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
0xA7D3, // (ꟓ) LATIN SMALL LETTER DOUBLE THORN
// these just seem dangerous
0xAB35, // (ꬵ) LATIN SMALL LETTER LENIS F
0x131, // (ı) LATIN SMALL LETTER DOTLESS I
0x237, // (ȷ) LATIN SMALL LETTER DOTLESS J
0x138, // (ĸ) LATIN SMALL LETTER KRA
0x266D, // (♭) MUSIC FLAT SIGN
// scuffed digits
// 2
0x2621, // (☡) CAUTION SIGN
0x1BB, // (ƻ) LATIN LETTER TWO WITH STROKE
0x218A, // (↊) TURNED DIGIT TWO
0x1D24, // (ᴤ) LATIN LETTER VOICED LARYNGEAL SPIRANT
// 3 (see: e)
0x21D, // (ȝ) LATIN SMALL LETTER YOGH
0x1B9, // (ƹ) LATIN SMALL LETTER EZH REVERSED
0x1BA, // (ƺ) LATIN SMALL LETTER EZH WITH TAIL
0x292, // (ʒ) LATIN SMALL LETTER EZH
0x293, // (ʓ) LATIN SMALL LETTER EZH WITH CURL
0x1EF, // (ǯ) LATIN SMALL LETTER EZH WITH CARON
0x1D23, // (ᴣ) LATIN LETTER SMALL CAPITAL EZH
0xA76B, // (ꝫ) LATIN SMALL LETTER ET
0xA763, // (ꝣ) LATIN SMALL LETTER VISIGOTHIC Z
0x218B, // (↋) TURNED DIGIT THREE
0xA72B, // (ꜫ) LATIN SMALL LETTER TRESILLO
0x25B, // (ɛ) LATIN SMALL LETTER OPEN E
0x25C, // (ɜ) LATIN SMALL LETTER REVERSED OPEN E
0x1D93, // (ᶓ) LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK
0x1D94, // (ᶔ) LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK
0x2125, // (℥) OUNCE SIGN
// 4
0xA72F, // (ꜯ) LATIN SMALL LETTER CUATRILLO WITH COMMA
0xA72D, // (ꜭ) LATIN SMALL LETTER CUATRILLO
// 6
0x1EFD, // (ỽ) LATIN SMALL LETTER MIDDLE-WELSH V
// 8
0x223, // (ȣ) LATIN SMALL LETTER OU
0x1D15, // (ᴕ) LATIN LETTER SMALL CAPITAL OU
// 9
0xA76F, // (ꝯ) LATIN SMALL LETTER CON
// digraphs
0x238, // (ȸ) LATIN SMALL LETTER DB DIGRAPH
0x239, // (ȹ) LATIN SMALL LETTER QP DIGRAPH
0x2A3, // (ʣ) LATIN SMALL LETTER DZ DIGRAPH
0x1C6, // (dž) LATIN SMALL LETTER DZ WITH CARON
0x2A4, // (ʤ) LATIN SMALL LETTER DEZH DIGRAPH
0x2A5, // (ʥ) LATIN SMALL LETTER DZ DIGRAPH WITH CURL
0x2A6, // (ʦ) LATIN SMALL LETTER TS DIGRAPH
0x2A8, // (ʨ) LATIN SMALL LETTER TC DIGRAPH WITH CURL
0x2AA, // (ʪ) LATIN SMALL LETTER LS DIGRAPH
0x2AB, // (ʫ) LATIN SMALL LETTER LZ DIGRAPH
0xAB66, // (ꭦ) LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK
0xAB67, // (ꭧ) LATIN SMALL LETTER TS DIGRAPH WITH RETROFLEX HOOK
0x1DF12, // (𝼒) LATIN SMALL LETTER DEZH DIGRAPH WITH PALATAL HOOK
0x1DF19, // (𝼙) LATIN SMALL LETTER DEZH DIGRAPH WITH RETROFLEX HOOK
0x1A3, // (ƣ) LATIN SMALL LETTER OI
0x1E3, // (ǣ) LATIN SMALL LETTER AE WITH MACRON
0x1FD, // (ǽ) LATIN SMALL LETTER AE WITH ACUTE
0x26E, // (ɮ) LATIN SMALL LETTER LEZH
0x276, // (ɶ) LATIN LETTER SMALL CAPITAL OE
0x1D6B, // (ᵫ) LATIN SMALL LETTER UE
0x1D01, // (ᴁ) LATIN LETTER SMALL CAPITAL AE
0xAB50, // (ꭐ) LATIN SMALL LETTER UI
0xAB60, // (ꭠ) LATIN SMALL LETTER SAKHA YAT
0xAB61, // (ꭡ) LATIN SMALL LETTER IOTIFIED E
0xAB62, // (ꭢ) LATIN SMALL LETTER OPEN OE
0xAB63, // (ꭣ) LATIN SMALL LETTER UO
0x1F670, // (🙰) SCRIPT LIGATURE ET ORNAMENT
0x1F671, // (🙱) HEAVY SCRIPT LIGATURE ET ORNAMENT
0xA729, // (ꜩ) LATIN SMALL LETTER TZ
0xA733, // (ꜳ) LATIN SMALL LETTER AA
0xA735, // (ꜵ) LATIN SMALL LETTER AO
0xA737, // (ꜷ) LATIN SMALL LETTER AU
0xA739, // (ꜹ) LATIN SMALL LETTER AV
0xA73B, // (ꜻ) LATIN SMALL LETTER AV WITH HORIZONTAL BAR
0xA73D, // (ꜽ) LATIN SMALL LETTER AY
0xA74F, // (ꝏ) LATIN SMALL LETTER OO
0xA761, // (ꝡ) LATIN SMALL LETTER VY
//0x133, // (ij) LATIN SMALL LIGATURE IJ
0x153, // (œ) LATIN SMALL LIGATURE OE
0xAB40, // (ꭀ) LATIN SMALL LETTER INVERTED OE
//0xFB00, // (ff) LATIN SMALL LIGATURE FF => [66 66]
//0xFB01, // (fi) LATIN SMALL LIGATURE FI => [66 69]
//0xFB02, // (fl) LATIN SMALL LIGATURE FL => [66 6C]
//0xFB03, // (ffi) LATIN SMALL LIGATURE FFI => [66 66 69]
//0xFB04, // (ffl) LATIN SMALL LIGATURE FFL => [66 66 6C]
0xFB05, // (ſt) LATIN SMALL LIGATURE LONG S T => [73 74]
0xFB06, // (st) LATIN SMALL LIGATURE ST => [73 74]
0x195, // (ƕ) LATIN SMALL LETTER HV
0x2114, // (℔) L B BAR SYMBOL
0x1EFB, // (ỻ) LATIN SMALL LETTER MIDDLE-WELSH LL
// epigraphic
0xA7F7, // (ꟷ) LATIN EPIGRAPHIC LETTER SIDEWAYS I
0xA7FB, // (ꟻ) LATIN EPIGRAPHIC LETTER REVERSED F
0xA7FC, // (ꟼ) LATIN EPIGRAPHIC LETTER REVERSED P
0xA7FD, // (ꟽ) LATIN EPIGRAPHIC LETTER INVERTED M
0xA7FE, // (ꟾ) LATIN EPIGRAPHIC LETTER I LONGA
0xA7FF, // (ꟿ) LATIN EPIGRAPHIC LETTER ARCHAIC M
0x214C, // (⅌) PER SIGN
0x2123, // (℣) VERSICLE
0x2108, // (℈) SCRUPLE
0x214D, // (⅍) AKTIESELSKAB
0x214A, // (⅊) PROPERTY LINE
0x2104, // (℄) CENTRE LINE SYMBOL
0x211F, // (℟) RESPONSE
0x1F545, // (🕅) SYMBOL FOR MARKS CHAPTER
0x1F5DA, // (🗚) INCREASE FONT SIZE SYMBOL
0x1F5DB, // (🗛) DECREASE FONT SIZE SYMBOL
// currency (dead)
0x20AF, // (₯) DRACHMA SIGN
0x20A0, // (₠) EURO-CURRENCY SIGN
0x20A2, // (₢) CRUZEIRO SIGN
0x20A3, // (₣) FRENCH FRANC SIGN
0x20A4, // (₤) LIRA SIGN
0x20A5, // (₥) MILL SIGN
0x20A7, // (₧) PESETA SIGN
0x20B0, // (₰) GERMAN PENNY SIGN
0x20B3, // (₳) AUSTRAL SIGN
0x20B6, // (₶) LIVRE TOURNOIS SIGN
0x20B7, // (₷) SPESMILO SIGN
0x20BB, // (₻) NORDIC MARK SIGN
// hash-like
0x266E, // (♮) MUSIC NATURAL SIGN
0x266F, // (♯) MUSIC SHARP SIGN
0x232D, // (⌭) CYLINDRICITY
0x2317, // (⌗) VIEWDATA SQUARE
// weird shit
0xA723, // (ꜣ) LATIN SMALL LETTER EGYPTOLOGICAL ALEF
0xA725,// (ꜥ) LATIN SMALL LETTER EGYPTOLOGICAL AIN
0xA78C, // (ꞌ) LATIN SMALL LETTER SALTILLO
0xA78F, // (ꞏ) LATIN LETTER SINOLOGICAL DOT
// with stroke
0x2C65, // (ⱥ) LATIN SMALL LETTER A WITH STROKE
0x180, // (ƀ) LATIN SMALL LETTER B WITH STROKE
0x23C, // (ȼ) LATIN SMALL LETTER C WITH STROKE
0x111, // (đ) LATIN SMALL LETTER D WITH STROKE
0x247, // (ɇ) LATIN SMALL LETTER E WITH STROKE
0xA799, // (ꞙ) LATIN SMALL LETTER F WITH STROKE
0x1E5, // (ǥ) LATIN SMALL LETTER G WITH STROKE
0x127, // (ħ) LATIN SMALL LETTER H WITH STROKE
0x268, // (ɨ) LATIN SMALL LETTER I WITH STROKE
0x249, // (ɉ) LATIN SMALL LETTER J WITH STROKE
0xA741, // (ꝁ) LATIN SMALL LETTER K WITH STROKE
//0x142, // (ł) LATIN SMALL LETTER L WITH STROKE (20221114: most frequent polish, kinda dangerous)
///0xF8, // (ø) LATIN SMALL LETTER O WITH STROKE (20221030: changed for norwegian/danish)
0x1D7D, // (ᵽ) LATIN SMALL LETTER P WITH STROKE
0x24D, // (ɍ) LATIN SMALL LETTER R WITH STROKE
0x167, // (ŧ) LATIN SMALL LETTER T WITH STROKE
0xA7B9, // (ꞹ) LATIN SMALL LETTER U WITH STROKE
0x24F, // (ɏ) LATIN SMALL LETTER Y WITH STROKE
0x1B6, // (ƶ) LATIN SMALL LETTER Z WITH STROKE
// weird strokes
0xA7C8, // (ꟈ) LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY
0xA7A1, // (ꞡ) LATIN SMALL LETTER G WITH OBLIQUE STROKE
0x1DF1A, // (𝼚) LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK
0xA7A3, // (ꞣ) LATIN SMALL LETTER K WITH OBLIQUE STROKE
0xA743, // (ꝃ) LATIN SMALL LETTER K WITH DIAGONAL STROKE
0xA745, // (ꝅ) LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
0xA749, // (ꝉ) LATIN SMALL LETTER L WITH HIGH STROKE
0xA7A5, // (ꞥ) LATIN SMALL LETTER N WITH OBLIQUE STROKE
0xAB3F, // (ꬿ) LATIN SMALL LETTER OPEN O WITH STROKE
0x1FF, // (ǿ) LATIN SMALL LETTER O WITH STROKE AND ACUTE
0xA74B, // (ꝋ) LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
0xA751, // (ꝑ) LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
0xA757, // (ꝗ) LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
0xA759, // (ꝙ) LATIN SMALL LETTER Q WITH DIAGONAL STROKE
0xA7A7, // (ꞧ) LATIN SMALL LETTER R WITH OBLIQUE STROKE
0xA7A9, // (ꞩ) LATIN SMALL LETTER S WITH OBLIQUE STROKE
0xA7CA, // (ꟊ) LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY
0x2C66, // (ⱦ) LATIN SMALL LETTER T WITH DIAGONAL STROKE
0xA75F, // (ꝟ) LATIN SMALL LETTER WITH DIAGONAL STROKE
// wtf strokes
0x1D7B, // (ᵻ) LATIN SMALL CAPITAL LETTER I WITH STROKE
0x1D7E, // (ᵾ) LATIN SMALL CAPITAL LETTER U WITH STROKE
0x1D7C, // (ᵼ) LATIN SMALL LETTER IOTA WITH STROKE
0x1D7F, // (ᵿ) LATIN SMALL LETTER UPSILON WITH STROKE
0x19B, // (ƛ) LATIN SMALL LETTER LAMBDA WITH STROKE
0x275, // (ɵ) LATIN SMALL LETTER BARRED O
0x289, // (ʉ) LATIN SMALL LETTER U BAR
0xA793, // (ꞓ) LATIN SMALL LETTER C WITH BAR
0x1D7A, // (ᵺ) LATIN SMALL LETTER TH WITH STRIKETHROUGH
// legs
0xAB4E, // (ꭎ) LATIN SMALL LETTER U WITH SHORT RIGHT LEG
0xAB4F, // (ꭏ) LATIN SMALL LETTER U BAR WITH SHORT RIGHT LEG
0xAB5A, // (ꭚ) LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
0xAB56, // (ꭖ) LATIN SMALL LETTER X WITH LOW RIGHT RING
0xAB57, // (ꭗ) LATIN SMALL LETTER X WITH LONG LEFT LEG
0xAB58, // (ꭘ) LATIN SMALL LETTER X WITH LONG LEFT LEG AND LOW RIGHT RING
0xAB59, // (ꭙ) LATIN SMALL LETTER X WITH LONG LEFT LEG WITH SERIF
0x19E, // (ƞ) LATIN SMALL LETTER N WITH LONG RIGHT LEG
// tails
0x23F, // (ȿ) LATIN SMALL LETTER S WITH SWASH TAIL
0x240, // (ɀ) LATIN SMALL LETTER Z WITH SWASH TAIL
0x24B, // (ɋ) LATIN SMALL LETTER Q WITH HOOK TAIL
0x256, // (ɖ) LATIN SMALL LETTER D WITH TAIL
0x29D, // (ʝ) LATIN SMALL LETTER J WITH CROSSED-TAIL
0xAB3A, // (ꬺ) LATIN SMALL LETTER M WITH CROSSED-TAIL
0xAB3B, // (ꬻ) LATIN SMALL LETTER N WITH CROSSED-TAIL
// curls
0x255, // (ɕ) LATIN SMALL LETTER C WITH CURL
0x221, // (ȡ) LATIN SMALL LETTER D WITH CURL
0x234, // (ȴ) LATIN SMALL LETTER L WITH CURL
0x235, // (ȵ) LATIN SMALL LETTER N WITH CURL
0x1DF1E, // (𝼞) LATIN SMALL LETTER S WITH CURL
0x236, // (ȶ) LATIN SMALL LETTER T WITH CURL
0x2C74, // (ⱴ) LATIN SMALL LETTER V WITH CURL
0x291, // (ʑ) LATIN SMALL LETTER Z WITH CURL
0x1EFF, // (ỿ) LATIN SMALL LETTER Y WITH LOOP
0x1E9A, // (ẚ) LATIN SMALL LETTER A WITH RIGHT HALF RING
0x1E9C, // (ẜ) LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE
0x1E9D, // (ẝ) LATIN SMALL LETTER LONG S WITH HIGH STROKE
//0x1E9B, // (ẛ) LATIN SMALL LETTER LONG S WITH DOT ABOVE
// 20230829: changed in unicode 15.1
// now mapped to: DF (ß) LATIN SMALL LETTER SHARP S
//0x1E9E, // (ẞ) LATIN CAPITAL LETTER SHARP S
// what the fuck are these, latin greeks?
0x251, // (ɑ) LATIN SMALL LETTER ALPHA
0x1E9F, // (ẟ) LATIN SMALL LETTER DELTA
0x263, // (ɣ) LATIN SMALL LETTER GAMMA
0x278, // (ɸ) LATIN SMALL LETTER PHI
0x269, // (ɩ) LATIN SMALL LETTER IOTA
0x277, // (ɷ) LATIN SMALL LETTER CLOSED OMEGA
0x28A, // (ʊ) LATIN SMALL LETTER UPSILON
0x2C77, // (ⱷ) LATIN SMALL LETTER TAILLESS PHI
0xAB30, // (ꬰ) LATIN SMALL LETTER BARRED ALPHA
0xAB53, // (ꭓ) LATIN SMALL LETTER CHI
0xAB54, // (ꭔ) LATIN SMALL LETTER CHI WITH LOW RIGHT RING
0xAB55, // (ꭕ) LATIN SMALL LETTER CHI WITH LOW LEFT SERIF
0xAB64, // (ꭤ) LATIN SMALL LETTER INVERTED ALPHA
0xA7B5, // (ꞵ) LATIN SMALL LETTER BETA
0xA7B7, // (ꞷ) LATIN SMALL LETTER OMEGA
0x2180, // (ↀ) ROMAN NUMERAL ONE THOUSAND C D
0x2181, // (ↁ) ROMAN NUMERAL FIVE THOUSAND (cool character)
0x2182, // (ↂ) ROMAN NUMERAL TEN THOUSAND
0x2185, // (ↅ) ROMAN NUMERAL SIX LATE FORM
0x2186, // (ↆ) ROMAN NUMERAL FIFTY EARLY FORM
0x2187, // (ↇ) ROMAN NUMERAL FIFTY THOUSAND
0x2188, // (ↈ) ROMAN NUMERAL ONE HUNDRED THOUSAND
0x10190, // (𐆐) ROMAN SEXTANS SIGN
0x10191, // (𐆑) ROMAN UNCIA SIGN
0x10192, // (𐆒) ROMAN SEMUNCIA SIGN
0x10193, // (𐆓) ROMAN SEXTULA SIGN
0x10194, // (𐆔) ROMAN DIMIDIA SEXTULA SIGN
0x10195, // (𐆕) ROMAN SILIQUA SIGN
0x10196, // (𐆖) ROMAN DENARIUS SIGN
0x10197, // (𐆗) ROMAN QUINARIUS SIGN
0x10198, // (𐆘) ROMAN SESTERTIUS SIGN
0x10199, // (𐆙) ROMAN DUPONDIUS SIGN
0x1019A, // (𐆚) ROMAN AS SIGN
0x1019B, // (𐆛) ROMAN CENTURIAL SIGN
0x1019C, // (𐆜) ASCIA SYMBOL
// percusive
0x2AC, // (ʬ) LATIN LETTER BILABIAL PERCUSSIVE
0x2AD, // (ʭ) LATIN LETTER BIDENTAL PERCUSSIVE
// glottal
0x1BE, // (ƾ) LATIN LETTER INVERTED GLOTTAL STOP WITH STROKE
0x242, // (ɂ) LATIN SMALL LETTER GLOTTAL STOP
0x294, // (ʔ) LATIN LETTER GLOTTAL STOP
0x296, // (ʖ) LATIN LETTER INVERTED GLOTTAL STOP
0x2A1, // (ʡ) LATIN LETTER GLOTTAL STOP WITH STROKE
0x295, // (ʕ) LATIN LETTER PHARYNGEAL VOICED FRICATIVE
0x2A2, // (ʢ) LATIN LETTER REVERSED GLOTTAL STOP WITH STROKE
0x1DF0E, // (𝼎) LATIN LETTER INVERTED GLOTTAL STOP WITH CURL
0xA7BB, // (ꞻ) LATIN SMALL LETTER GLOTTAL A
0xA7BD, // (ꞽ) LATIN SMALL LETTER GLOTTAL I
0xA7BF, // (ꞿ) LATIN SMALL LETTER GLOTTAL U
0x97D, // (ॽ) DEVANAGARI LETTER GLOTTAL STOP
// stretched c
0x297, // (ʗ) LATIN LETTER STRETCHED C
0x1DF0F, // (𝼏) LATIN LETTER STRETCHED C WITH CURL
// half h
0x2C76, // (ⱶ) LATIN SMALL LETTER HALF H
0xA7F5, // (Ꟶ) LATIN CAPITAL LETTER REVERSED HALF H
0xA7F6, // (ꟶ) LATIN SMALL LETTER REVERSED HALF H
// with HOOK
0x188, // (ƈ) LATIN SMALL LETTER C WITH HOOK
0x192, // (ƒ) LATIN SMALL LETTER F WITH HOOK
0x199, // (ƙ) LATIN SMALL LETTER K WITH HOOK
0x1A5, // (ƥ) LATIN SMALL LETTER P WITH HOOK
0x1AD, // (ƭ) LATIN SMALL LETTER T WITH HOOK
0x1B4, // (ƴ) LATIN SMALL LETTER Y WITH HOOK
0x225, // (ȥ) LATIN SMALL LETTER Z WITH HOOK
0x253, // (ɓ) LATIN SMALL LETTER B WITH HOOK
0x257, // (ɗ) LATIN SMALL LETTER D WITH HOOK
0x260, // (ɠ) LATIN SMALL LETTER G WITH HOOK
0x266, // (ɦ) LATIN SMALL LETTER H WITH HOOK
0x271, // (ɱ) LATIN SMALL LETTER M WITH HOOK
0x282, // (ʂ) LATIN SMALL LETTER S WITH HOOK
0x28B, // (ʋ) LATIN SMALL LETTER V WITH HOOK
0x2A0, // (ʠ) LATIN SMALL LETTER Q WITH HOOK
0x1D91, // (ᶑ) LATIN SMALL LETTER D WITH HOOK AND TAIL
0x2C73, // (ⱳ) LATIN SMALL LETTER W WITH HOOK
// left hook
0x272, // (ɲ) LATIN SMALL LETTER N WITH LEFT HOOK
0xAB52, // (ꭒ) LATIN SMALL LETTER U WITH LEFT HOOK
0x1DF11, // (𝼑) LATIN SMALL LETTER L WITH FISHHOOK
0x1DF16, // (𝼖) LATIN SMALL LETTER R WITH FISHHOOK AND PALATAL HOOK
0x1DF25, // (𝼥) LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK
0x1DF26, // (𝼦) LATIN SMALL LETTER L WITH MID-HEIGHT LEFT HOOK
0x1DF27, // (𝼧) LATIN SMALL LETTER N WITH MID-HEIGHT LEFT HOOK
0x1DF28, // (𝼨) LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK
0x1DF29, // (𝼩) LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK
0x1DF2A, // (𝼪) LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
// right hook
0x2C71, // (ⱱ) LATIN SMALL LETTER V WITH RIGHT HOOK
// retroflex hook
0x1D92, // (ᶒ) LATIN SMALL LETTER E WITH RETROFLEX HOOK
0x1D96, // (ᶖ) LATIN SMALL LETTER I WITH RETROFLEX HOOK
0x26D, // (ɭ) LATIN SMALL LETTER L WITH RETROFLEX HOOK
0x273, // (ɳ) LATIN SMALL LETTER N WITH RETROFLEX HOOK
0x288, // (ʈ) LATIN SMALL LETTER T WITH RETROFLEX HOOK
0x290, // (ʐ) LATIN SMALL LETTER Z WITH RETROFLEX HOOK
0x1D8F, // (ᶏ) LATIN SMALL LETTER A WITH RETROFLEX HOOK
0x1D90, // (ᶐ) LATIN SMALL LETTER ALPHA WITH RETROFLEX HOOK
0x1D97, // (ᶗ) LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK
0x1D99, // (ᶙ) LATIN SMALL LETTER U WITH RETROFLEX HOOK
0x1D9A, // (ᶚ) LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
0xA78E, // (ꞎ) LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT
0x1DF05, // (𝼅) LATIN SMALL LETTER LEZH WITH RETROFLEX HOOK
0x1DF0A, // (𝼊) LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
0x1DF1B, // (𝼛) LATIN SMALL LETTER O WITH RETROFLEX HOOK
0x1DF1D, // (𝼝) LATIN SMALL LETTER C WITH RETROFLEX HOOK
0x1DF09, // (𝼉) LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
// palatal hook
0x1AB, // (ƫ) LATIN SMALL LETTER T WITH PALATAL HOOK
0x1D80, // (ᶀ) LATIN SMALL LETTER B WITH PALATAL HOOK
0x1D81, // (ᶁ) LATIN SMALL LETTER D WITH PALATAL HOOK
0x1D82, // (ᶂ) LATIN SMALL LETTER F WITH PALATAL HOOK
0x1D83, // (ᶃ) LATIN SMALL LETTER G WITH PALATAL HOOK
0x1D84, // (ᶄ) LATIN SMALL LETTER K WITH PALATAL HOOK
0x1D85, // (ᶅ) LATIN SMALL LETTER L WITH PALATAL HOOK
0x1D86, // (ᶆ) LATIN SMALL LETTER M WITH PALATAL HOOK
0x1D87, // (ᶇ) LATIN SMALL LETTER N WITH PALATAL HOOK
0x1D88, // (ᶈ) LATIN SMALL LETTER P WITH PALATAL HOOK
0x1D8A, // (ᶊ) LATIN SMALL LETTER S WITH PALATAL HOOK
0x1D8C, // (ᶌ) LATIN SMALL LETTER V WITH PALATAL HOOK
0x1D8D, // (ᶍ) LATIN SMALL LETTER X WITH PALATAL HOOK
0x1D8E, // (ᶎ) LATIN SMALL LETTER Z WITH PALATAL HOOK
0xA794, // (ꞔ) LATIN SMALL LETTER C WITH PALATAL HOOK
0xA795, // (ꞕ) LATIN SMALL LETTER H WITH PALATAL HOOK
0x1DF18, // (𝼘) LATIN SMALL LETTER EZH WITH PALATAL HOOK
// with descender
0x2C68, // (ⱨ) LATIN SMALL LETTER H WITH DESCENDER
0x2C6A, // (ⱪ) LATIN SMALL LETTER K WITH DESCENDER
0x2C6C, // (ⱬ) LATIN SMALL LETTER Z WITH DESCENDER
0xA791, // (ꞑ) LATIN SMALL LETTER N WITH DESCENDER
// top bar
0x183, // (ƃ) LATIN SMALL LETTER B WITH TOPBAR
0x18C, // (ƌ) LATIN SMALL LETTER D WITH TOPBAR
// tones
0x185, // (ƅ) LATIN SMALL LETTER TONE SIX
0x1A8, // (ƨ) LATIN SMALL LETTER TONE TWO
0x1BD, // (ƽ) LATIN SMALL LETTER TONE FIVE
//0x135F, // (◌፟) ETHIOPIC COMBINING GEMINATION MARK
0x1363, // (፣) ETHIOPIC COMMA
0x1364, // (፤) ETHIOPIC SEMICOLON
0x1365, // (፥) ETHIOPIC COLON
0x1366, // (፦) ETHIOPIC PREFACE COLON
0x1367, // (፧) ETHIOPIC QUESTION MARK
0x1361, // (፡) ETHIOPIC WORDSPACE
/*
// 20240423: why weren't these disabled?
// `node tools/reg-count.js 1390..1399` shows 1 illegal reg
// TODO: disable for 16
0x1390, // (᎐) ETHIOPIC TONAL MARK YIZET
0x1391, // (᎑) ETHIOPIC TONAL MARK DERET
0x1392, // (᎒) ETHIOPIC TONAL MARK RIKRIK
0x1393, // (᎓) ETHIOPIC TONAL MARK SHORT RIKRIK
0x1394, // (᎔) ETHIOPIC TONAL MARK DIFAT
0x1395, // (᎕) ETHIOPIC TONAL MARK KENAT
0x1396, // (᎖) ETHIOPIC TONAL MARK CHIRET
0x1397, // (᎗) ETHIOPIC TONAL MARK HIDET
0x1398, // (᎘) ETHIOPIC TONAL MARK DERET-HIDET
0x1399, // (᎙) ETHIOPIC TONAL MARK KURT
*/
// with tilde
0x1D6C, // (ᵬ) LATIN SMALL LETTER B WITH MIDDLE TILDE
0x1D6D, // (ᵭ) LATIN SMALL LETTER D WITH MIDDLE TILDE
0x1D6E, // (ᵮ) LATIN SMALL LETTER F WITH MIDDLE TILDE
0x26B, // (ɫ) LATIN SMALL LETTER L WITH MIDDLE TILDE
0x1D6F, // (ᵯ) LATIN SMALL LETTER M WITH MIDDLE TILDE
0x1D70, // (ᵰ) LATIN SMALL LETTER N WITH MIDDLE TILDE
0x1D71, // (ᵱ) LATIN SMALL LETTER P WITH MIDDLE TILDE
0x1D74, // (ᵴ) LATIN SMALL LETTER S WITH MIDDLE TILDE
0x1D75, // (ᵵ) LATIN SMALL LETTER T WITH MIDDLE TILDE
0x1D76, // (ᵶ) LATIN SMALL LETTER Z WITH MIDDLE TILDE
// schwa
0x259, // (ə) LATIN SMALL LETTER SCHWA
0x25A, // (ɚ) LATIN SMALL LETTER SCHWA WITH HOOK
0x1D95, // (ᶕ) LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK
0xAB31, // (ꬱ) LATIN SMALL LETTER A REVERSED-SCHWA
// letter e
0x25D, // (ɝ) LATIN SMALL LETTER REVERSED OPEN E WITH HOOK
0x258, // (ɘ) LATIN SMALL LETTER REVERSED E
0x25E, // (ɞ) LATIN SMALL LETTER CLOSED REVERSED OPEN E
0x29A, // (ʚ) LATIN SMALL LETTER CLOSED OPEN E
0x2C78, // (ⱸ) LATIN SMALL LETTER E WITH NOTCH
0xAB33, // (ꬳ) LATIN SMALL LETTER BARRED E
0xAB34, // (ꬴ) LATIN SMALL LETTER E WITH FLOURISH
0x212E, // (℮) ESTIMATED SYMBOL
// letter l
0x19A, // (ƚ) LATIN SMALL LETTER L WITH BAR
0x2C61, // (ⱡ) LATIN SMALL LETTER L WITH DOUBLE BAR
0xAB37, // (ꬷ) LATIN SMALL LETTER L WITH INVERTED LAZY S
0xAB39, // (ꬹ) LATIN SMALL LETTER L WITH MIDDLE RING
0xAB38, // (ꬸ) LATIN SMALL LETTER L WITH DOUBLE MIDDLE TILDE
0xA747, // (ꝇ) LATIN SMALL LETTER BROKEN L
0x26C, // (ɬ) LATIN SMALL LETTER L WITH BELT
0x1DF04, // (𝼄) LATIN LETTER SMALL CAPITAL L WITH BELT
0x1DF13, // (𝼓) LATIN SMALL LETTER L WITH BELT AND PALATAL HOOK
// letter r
0x27C, // (ɼ) LATIN SMALL LETTER R WITH LONG LEG
0x27D, // (ɽ) LATIN SMALL LETTER R WITH TAIL
0x27E, // (ɾ) LATIN SMALL LETTER R WITH FISHHOOK
0x27F, // (ɿ) LATIN SMALL LETTER REVERSED R WITH FISHHOOK
0x281, // (ʁ) LATIN LETTER SMALL CAPITAL INVERTED R
0x1D89, // (ᶉ) LATIN SMALL LETTER R WITH PALATAL HOOK
0x1D72, // (ᵲ) LATIN SMALL LETTER R WITH MIDDLE TILDE
0x1D73, // (ᵳ) LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE
0x1D19, // (ᴙ) LATIN LETTER SMALL CAPITAL REVERSED R
0xAB46, // (ꭆ) LATIN LETTER SMALL CAPITAL R WITH RIGHT LEG
0xA776, // (ꝶ) LATIN LETTER SMALL CAPITAL RUM
0xAB45, // (ꭅ) LATIN SMALL LETTER STIRRUP R
0xAB47, // (ꭇ) LATIN SMALL LETTER R WITHOUT HANDLE
0xAB48, // (ꭈ) LATIN SMALL LETTER DOUBLE R
0xAB49, // (ꭉ) LATIN SMALL LETTER R WITH CROSSED-TAIL
0xAB4A, // (ꭊ) LATIN SMALL LETTER DOUBLE R WITH CROSSED-TAIL
0xAB4B, // (ꭋ) LATIN SMALL LETTER SCRIPT R
0xAB4C, // (ꭌ) LATIN SMALL LETTER SCRIPT R WITH RING
// esh
0x1AA, // (ƪ) LATIN LETTER REVERSED ESH LOOP
0x283, // (ʃ) LATIN SMALL LETTER ESH
0x284, // (ʄ) LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK
0x285, // (ʅ) LATIN SMALL LETTER SQUAT REVERSED ESH
0x286, // (ʆ) LATIN SMALL LETTER ESH WITH CURL
0x2A7, // (ʧ) LATIN SMALL LETTER TESH DIGRAPH
0xAB4D, // (ꭍ) LATIN SMALL LETTER BASELINE ESH
0x1D8B, // (ᶋ) LATIN SMALL LETTER ESH WITH PALATAL HOOK
0x1D98, // (ᶘ) LATIN SMALL LETTER ESH WITH RETROFLEX HOOK
0x1DF0B, // (𝼋) LATIN SMALL LETTER ESH WITH DOUBLE BAR
0x1DF0C, // (𝼌) LATIN SMALL LETTER ESH WITH DOUBLE BAR AND CURL
0x1DF17, // (𝼗) LATIN SMALL LETTER TESH DIGRAPH WITH PALATAL HOOK
0x1DF1C, // (𝼜) LATIN SMALL LETTER TESH DIGRAPH WITH RETROFLEX HOOK
// eng
0x14B, // (ŋ) LATIN SMALL LETTER ENG
0x267, // (ɧ) LATIN SMALL LETTER HENG WITH HOOK
0x2A9, // (ʩ) LATIN SMALL LETTER FENG DIGRAPH
0xA727, // (ꜧ) LATIN SMALL LETTER HENG
0xAB3C, // (ꬼ) LATIN SMALL LETTER ENG WITH CROSSED-TAIL
0x1DF14, // (𝼔) LATIN SMALL LETTER ENG WITH PALATAL HOOK
0x1DF00, // (𝼀) LATIN SMALL LETTER FENG DIGRAPH WITH TRILL
0x1DF07, // (𝼇) LATIN SMALL LETTER REVERSED ENG
// subscript
0x2090, // (ₐ) LATIN SUBSCRIPT SMALL LETTER A
0x2091, // (ₑ) LATIN SUBSCRIPT SMALL LETTER E
0x2095, // (ₕ) LATIN SUBSCRIPT SMALL LETTER H
0x1D62, // (ᵢ) LATIN SUBSCRIPT SMALL LETTER I
0x2C7C, // (ⱼ) LATIN SUBSCRIPT SMALL LETTER J
0x2096, // (ₖ) LATIN SUBSCRIPT SMALL LETTER K
0x2097, // (ₗ) LATIN SUBSCRIPT SMALL LETTER L
0x2098, // (ₘ) LATIN SUBSCRIPT SMALL LETTER M
0x2099, // (ₙ) LATIN SUBSCRIPT SMALL LETTER N
0x2092, // (ₒ) LATIN SUBSCRIPT SMALL LETTER O
0x209A, // (ₚ) LATIN SUBSCRIPT SMALL LETTER P
0x1D63, // (ᵣ) LATIN SUBSCRIPT SMALL LETTER R
0x209B, // (ₛ) LATIN SUBSCRIPT SMALL LETTER S
0x209C, // (ₜ) LATIN SUBSCRIPT SMALL LETTER T
0x1D64, // (ᵤ) LATIN SUBSCRIPT SMALL LETTER U
0x1D65, // (ᵥ) LATIN SUBSCRIPT SMALL LETTER V
0x2093, // (ₓ) LATIN SUBSCRIPT SMALL LETTER X
//0x2094, // (ₔ) LATIN SUBSCRIPT SMALL LETTER SCHWA
0x1D66, // (ᵦ) GREEK SUBSCRIPT SMALL LETTER BETA
0x1D67, // (ᵧ) GREEK SUBSCRIPT SMALL LETTER GAMMA
0x1D68, // (ᵨ) GREEK SUBSCRIPT SMALL LETTER RHO
0x1D69, // (ᵩ) GREEK SUBSCRIPT SMALL LETTER PHI
0x1D6A, // (ᵪ) GREEK SUBSCRIPT SMALL LETTER CHI
0x1FBE, // (ι) GREEK PROSGEGRAMMENI
0x1E051, // (𞁑) CYRILLIC SUBSCRIPT SMALL LETTER A
0x1E052, // (𞁒) CYRILLIC SUBSCRIPT SMALL LETTER BE
0x1E053, // (𞁓) CYRILLIC SUBSCRIPT SMALL LETTER VE
0x1E054, // (𞁔) CYRILLIC SUBSCRIPT SMALL LETTER GHE
0x1E055, // (𞁕) CYRILLIC SUBSCRIPT SMALL LETTER DE
0x1E056, // (𞁖) CYRILLIC SUBSCRIPT SMALL LETTER IE
0x1E057, // (𞁗) CYRILLIC SUBSCRIPT SMALL LETTER ZHE
0x1E058, // (𞁘) CYRILLIC SUBSCRIPT SMALL LETTER ZE
0x1E059, // (𞁙) CYRILLIC SUBSCRIPT SMALL LETTER I
0x1E05A, // (𞁚) CYRILLIC SUBSCRIPT SMALL LETTER KA
0x1E05B, // (𞁛) CYRILLIC SUBSCRIPT SMALL LETTER EL
0x1E05C, // (𞁜) CYRILLIC SUBSCRIPT SMALL LETTER O
0x1E05D, // (𞁝) CYRILLIC SUBSCRIPT SMALL LETTER PE
0x1E05E, // (𞁞) CYRILLIC SUBSCRIPT SMALL LETTER ES
0x1E05F, // (𞁟) CYRILLIC SUBSCRIPT SMALL LETTER U
0x1E060, // (𞁠) CYRILLIC SUBSCRIPT SMALL LETTER EF
0x1E061, // (𞁡) CYRILLIC SUBSCRIPT SMALL LETTER HA
0x1E062, // (𞁢) CYRILLIC SUBSCRIPT SMALL LETTER TSE
0x1E063, // (𞁣) CYRILLIC SUBSCRIPT SMALL LETTER CHE
0x1E064, // (𞁤) CYRILLIC SUBSCRIPT SMALL LETTER SHA
0x1E065, // (𞁥) CYRILLIC SUBSCRIPT SMALL LETTER HARD SIGN
0x1E066, // (𞁦) CYRILLIC SUBSCRIPT SMALL LETTER YERU
0x1E067, // (𞁧) CYRILLIC SUBSCRIPT SMALL LETTER GHE WITH UPTURN
0x1E068, // (𞁨) CYRILLIC SUBSCRIPT SMALL LETTER BYELORUSSIAN-UKRAINIAN I
0x1E069, // (𞁩) CYRILLIC SUBSCRIPT SMALL LETTER DZE
0x1E06A, // (𞁪) CYRILLIC SUBSCRIPT SMALL LETTER DZHE
// subscript digits
0x2080, // (₀) SUBSCRIPT ZERO
0x2081, // (₁) SUBSCRIPT ONE
/*
0x2082, // (₂) SUBSCRIPT TWO
0x2083, // (₃) SUBSCRIPT THREE
0x2084, // (₄) SUBSCRIPT FOUR
0x2085, // (₅) SUBSCRIPT FIVE
0x2086, // (₆) SUBSCRIPT SIX
0x2087, // (₇) SUBSCRIPT SEVEN
0x2088, // (₈) SUBSCRIPT EIGHT
0x2089, // (₉) SUBSCRIPT NINE
*/
// superscript
0xBA, // (º) MASCULINE ORDINAL INDICATOR
0xAA, // (ª) FEMININE ORDINAL INDICATOR
0x2071, // (ⁱ) SUPERSCRIPT LATIN SMALL LETTER I
0x207F, // (ⁿ) SUPERSCRIPT LATIN SMALL LETTER N
0xFC5B, // (ﱛ) ARABIC LIGATURE THAL WITH SUPERSCRIPT ALEF ISOLATED FORM
0xFC5C, // (ﱜ) ARABIC LIGATURE REH WITH SUPERSCRIPT ALEF ISOLATED FORM
0xFC5D, // (ﱝ) ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF ISOLATED FORM
0xFC63, // (ﱣ) ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM
0xFC90, // (ﲐ) ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF FINAL FORM
0xFCD9, // (ﳙ) ARABIC LIGATURE HEH WITH SUPERSCRIPT ALEF INITIAL FORM
0x671, // (ٱ) ARABIC LETTER ALEF WASLA
0x672, // (ٲ) ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE
0x673, // (ٳ) ARABIC LETTER ALEF WITH WAVY HAMZA BELOW
0x674, // (ٴ) ARABIC LETTER HIGH HAMZA
0x675, // (ٵ) ARABIC LETTER HIGH HAMZA ALEF
0x676, // (ٶ) ARABIC LETTER HIGH HAMZA WAW
0x677, // (ٷ) ARABIC LETTER U WITH HAMZA ABOVE
0x678, // (ٸ) ARABIC LETTER HIGH HAMZA YEH
// superscript digits
0x2070, // (⁰) SUPERSCRIPT ZERO
0xB9, // (¹) SUPERSCRIPT ONE
/*
0xB2, // (²) SUPERSCRIPT TWO
0xB3, // (³) SUPERSCRIPT THREE
0x2074, // (⁴) SUPERSCRIPT FOUR
0x2075, // (⁵) SUPERSCRIPT FIVE
0x2076, // (⁶) SUPERSCRIPT SIX
0x2077, // (⁷) SUPERSCRIPT SEVEN
0x2078, // (⁸) SUPERSCRIPT EIGHT
0x2079, // (⁹) SUPERSCRIPT NINE
*/
// 20230916: these were added in Unicode 15.1
// but are disabled (and likely should be disabled)
// added here for future reference
/*
0x2FFC, // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT
0x2FFD, // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT
0x2FFE, // IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION
0x2FFF, // IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION
0x31EF, // IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
*/
// cjk
0x302A, // (◌〪) IDEOGRAPHIC LEVEL TONE MARK
0x302B, // (◌〫) IDEOGRAPHIC RISING TONE MARK
0x302C, // (◌〬) IDEOGRAPHIC DEPARTING TONE MARK
0x302D, // (◌〭) IDEOGRAPHIC ENTERING TONE MARK
0x302E, // (◌〮) HANGUL SINGLE DOT TONE MARK
0x302F, // (◌〯) HANGUL DOUBLE DOT TONE MARK
0x3031, // (〱) VERTICAL KANA REPEAT MARK
0x3032, // (〲) VERTICAL KANA REPEAT WITH VOICED SOUND MARK
0x3033, // (〳) VERTICAL KANA REPEAT MARK UPPER HALF
0x3034, // (〴) VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF
0x3035, // (〵) VERTICAL KANA REPEAT MARK LOWER HALF
0x3037, // (〷) IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL
0x3005, // (々) IDEOGRAPHIC ITERATION MARK
0x3006, // (〆) IDEOGRAPHIC CLOSING MARK
0x303B, // (〻) VERTICAL IDEOGRAPHIC ITERATION MARK
0x303C, // (〼) MASU MARK
0x303E, // (〾) IDEOGRAPHIC VARIATION INDICATOR
0x303F, // (〿) IDEOGRAPHIC HALF FILL SPACE
0x3190, // (㆐) IDEOGRAPHIC ANNOTATION LINKING MARK
0x3191, // (㆑) IDEOGRAPHIC ANNOTATION REVERSE MARK
// italics
0xFE45, // (﹅) SESAME DOT
0xFE46, // (﹆) WHITE SESAME DOT
// hebrew
//0x5BE, // (־) HEBREW PUNCTUATION MAQAF (Hyphen)
0x5C3, // (׃) HEBREW PUNCTUATION SOF PASUQ
0x5C6, // (׆) HEBREW PUNCTUATION NUN HAFUKHA
0x5C0, // (׀) HEBREW PUNCTUATION PASEQ // these could be enabled
//0x5F3, // (׳) HEBREW PUNCTUATION GERESH (Period, Number Mark) // since restricted using
0x5F4, // (״) HEBREW PUNCTUATION GERSHAYIM (End of Acronym) // apostrophe-like rule
// greek
0x371, // (ͱ) GREEK SMALL LETTER HETA (Ancient)
0x373, // (ͳ) GREEK SMALL LETTER ARCHAIC SAMPI (Ancient)
0x374, // (ʹ) GREEK NUMERAL SIGN
0x375, // (͵) GREEK LOWER NUMERAL SIGN
0x37B, // (ͻ) GREEK SMALL REVERSED LUNATE SIGMA SYMBOL
0x37C, // (ͼ) GREEK SMALL DOTTED LUNATE SIGMA SYMBOL
0x37D, // (ͽ) GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
0x3D7, // (ϗ) GREEK KAI SYMBOL (&)
0x3F6, // (϶) GREEK REVERSED LUNATE EPSILON SYMBOL
0x3F8, // (ϸ) GREEK SMALL LETTER SHO
0x3FB, // (ϻ) GREEK SMALL LETTER SAN (Ancient)
0x3FC, // (ϼ) GREEK RHO WITH STROKE SYMBOL
0x3D9, // (ϙ) GREEK SMALL LETTER ARCHAIC KOPPA (Ancient)
0x3DB, // (ϛ) GREEK SMALL LETTER STIGMA (Ancient, Ligature) (20230110: used for 6 numeral? 12345 are confused)
0x3DD, // (ϝ) GREEK SMALL LETTER DIGAMMA (Ancient)
0x3E1, // (ϡ) GREEK SMALL LETTER SAMPI
0x377, // (ͷ) GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
0x3DF, // (ϟ) GREEK SMALL LETTER KOPPA
0x3F3, // (ϳ) GREEK LETTER YOT
// cyrllic (unsure)
/*
0x497, // (җ) CYRILLIC SMALL LETTER ZHE WITH DESCENDER
0x49B, // (қ) CYRILLIC SMALL LETTER KA WITH DESCENDER
0x49D, // (ҝ) CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
0x4A3, // (ң) CYRILLIC SMALL LETTER EN WITH DESCENDER
0x4B3, // (ҳ) CYRILLIC SMALL LETTER HA WITH DESCENDER
0x446, // (ц) CYRILLIC SMALL LETTER TSE
0x449, // (щ) CYRILLIC SMALL LETTER SHCHA
0x4B7, // (ҷ) CYRILLIC SMALL LETTER CHE WITH DESCENDER
0x4B9, // (ҹ) CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
0x527, // (ԧ) CYRILLIC SMALL LETTER SHHA WITH DESCENDER
*/
// cyrillic (obsolete)
0x461, // (ѡ) CYRILLIC SMALL LETTER OMEGA
0x463, // (ѣ) CYRILLIC SMALL LETTER YAT
0x465, // (ѥ) CYRILLIC SMALL LETTER IOTIFIED E
0x467, // (ѧ) CYRILLIC SMALL LETTER LITTLE YUS
0x469, // (ѩ) CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
0x46B, // (ѫ) CYRILLIC SMALL LETTER BIG YUS
0x46D, // (ѭ) CYRILLIC SMALL LETTER IOTIFIED BIG YUS
0x46F, // (ѯ) CYRILLIC SMALL LETTER KSI
0x471, // (ѱ) CYRILLIC SMALL LETTER PSI
0x473, // (ѳ) CYRILLIC SMALL LETTER FITA
0x475, // (ѵ) CYRILLIC SMALL LETTER IZHITSA
0x479, // (ѹ) CYRILLIC SMALL LETTER UK
0x47B, // (ѻ) CYRILLIC SMALL LETTER ROUND OMEGA
0x47D, // (ѽ) CYRILLIC SMALL LETTER OMEGA WITH TITLO
0x47F, // (ѿ) CYRILLIC SMALL LETTER OT
0x481, // (ҁ) CYRILLIC SMALL LETTER KOPPA
0x482, // (҂) CYRILLIC THOUSANDS SIGN
0x501, // (ԁ) CYRILLIC SMALL LETTER KOMI DE
0x503, // (ԃ) CYRILLIC SMALL LETTER KOMI DJE
0x505, // (ԅ) CYRILLIC SMALL LETTER KOMI ZJE
0x507, // (ԇ) CYRILLIC SMALL LETTER KOMI DZJE
0x509, // (ԉ) CYRILLIC SMALL LETTER KOMI LJE
0x50B, // (ԋ) CYRILLIC SMALL LETTER KOMI NJE
0x50D, // (ԍ) CYRILLIC SMALL LETTER KOMI SJE
0x50F, // (ԏ) CYRILLIC SMALL LETTER KOMI TJE
0x52B, // (ԫ) CYRILLIC SMALL LETTER DZZHE
0x52D, // (ԭ) CYRILLIC SMALL LETTER DCHE
0xA641, // (ꙁ) CYRILLIC SMALL LETTER ZEMLYA
0xA643, // (ꙃ) CYRILLIC SMALL LETTER DZELO
0xA645, // (ꙅ) CYRILLIC SMALL LETTER REVERSED DZE
0xA647, // (ꙇ) CYRILLIC SMALL LETTER IOTA
0xA649, // (ꙉ) CYRILLIC SMALL LETTER DJERV
0xA64B, // (ꙋ) CYRILLIC SMALL LETTER MONOGRAPH UK
0xA64D, // (ꙍ) CYRILLIC SMALL LETTER BROAD OMEGA
0xA64F, // (ꙏ) CYRILLIC SMALL LETTER NEUTRAL YER
0xA651, // (ꙑ) CYRILLIC SMALL LETTER YERU WITH BACK YER
0xA653, // (ꙓ) CYRILLIC SMALL LETTER IOTIFIED YAT
0xA655, // (ꙕ) CYRILLIC SMALL LETTER REVERSED YU
0xA657, // (ꙗ) CYRILLIC SMALL LETTER IOTIFIED A
0xA659, // (ꙙ) CYRILLIC SMALL LETTER CLOSED LITTLE YUS
0xA65B, // (ꙛ) CYRILLIC SMALL LETTER BLENDED YUS
0xA65D, // (ꙝ) CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS
0xA65F, // (ꙟ) CYRILLIC SMALL LETTER YN
0xA661, // (ꙡ) CYRILLIC SMALL LETTER REVERSED TSE
0xA663, // (ꙣ) CYRILLIC SMALL LETTER SOFT DE
0xA665, // (ꙥ) CYRILLIC SMALL LETTER SOFT EL
0xA667, // (ꙧ) CYRILLIC SMALL LETTER SOFT EM
0xA669, // (ꙩ) CYRILLIC SMALL LETTER MONOCULAR O
0xA66B, // (ꙫ) CYRILLIC SMALL LETTER BINOCULAR O
0xA66D, // (ꙭ) CYRILLIC SMALL LETTER DOUBLE MONOCULAR O
0xA66E, // (ꙮ) CYRILLIC LETTER MULTIOCULAR O
0xA681, // (ꚁ) CYRILLIC SMALL LETTER DWE
0xA683, // (ꚃ) CYRILLIC SMALL LETTER DZWE
0xA685, // (ꚅ) CYRILLIC SMALL LETTER ZHWE
0xA687, // (ꚇ) CYRILLIC SMALL LETTER CCHE
0xA689, // (ꚉ) CYRILLIC SMALL LETTER DZZE
0xA68B, // (ꚋ) CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK
0xA68D, // (ꚍ) CYRILLIC SMALL LETTER TWE
0xA68F, // (ꚏ) CYRILLIC SMALL LETTER TSWE
0xA691, // (ꚑ) CYRILLIC SMALL LETTER TSSE
0xA693, // (ꚓ) CYRILLIC SMALL LETTER TCHE
0xA695, // (ꚕ) CYRILLIC SMALL LETTER HWE
0xA697, // (ꚗ) CYRILLIC SMALL LETTER SHWE
0xA699, // (ꚙ) CYRILLIC SMALL LETTER DOUBLE O
0xA69B, // (ꚛ) CYRILLIC SMALL LETTER CROSSED O
// combining marks
// https://discuss.ens.domains/t/ens-name-normalization/8652/391
// (these can be enabled later if they're needed)
// 20221018: these are now whitelisted,
// remaining marks are limited to 1 on non-whitelisted scripts
//0x300, // (◌̀) COMBINING GRAVE ACCENT
//0x301, // (◌́) COMBINING ACUTE ACCENT
//0x302, // (◌̂) COMBINING CIRCUMFLEX ACCENT
//0x303, // (◌̃) COMBINING TILDE
//0x304, // (◌̄) COMBINING MACRON
//0x305, // (◌̅) COMBINING OVERLINE
//0x306, // (◌̆) COMBINING BREVE
//0x307, // (◌̇) COMBINING DOT ABOVE
//0x308, // (◌̈) COMBINING DIAERESIS
//0x309, // (◌̉) COMBINING HOOK ABOVE
//0x30A, // (◌̊) COMBINING RING ABOVE
//0x30B, // (◌̋) COMBINING DOUBLE ACUTE ACCENT
//0x30C, // (◌̌) COMBINING CARON
0x30D, // (◌̍) COMBINING VERTICAL LINE ABOVE
0x30E, // (◌̎) COMBINING DOUBLE VERTICAL LINE ABOVE
0x30F, // (◌̏) COMBINING DOUBLE GRAVE ACCENT
0x310, // (◌̐) COMBINING CANDRABINDU
0x311, // (◌̑) COMBINING INVERTED BREVE
0x312, // (◌̒) COMBINING TURNED COMMA ABOVE