From 7b9c85677f353eae0ee4cf4b61520671d0f1b3a5 Mon Sep 17 00:00:00 2001 From: Maintenance App Date: Tue, 19 Sep 2023 21:29:24 -0500 Subject: [PATCH] Update Unicode to version 15.1.0 This is an automated commit created by the Maintenance project https://github.com/eksperimental/maintenance Before merging, please read the release notes by visiting and assess if additional changes are necessary in the code base. --- lib/elixir/lib/string.ex | 2 +- lib/elixir/unicode/IdentifierType.txt | 13 ++-- lib/elixir/unicode/PropList.txt | 78 ++++++++++++++++++--- lib/elixir/unicode/PropertyValueAliases.txt | 38 +++++++++- lib/elixir/unicode/ScriptExtensions.txt | 47 +++++++------ lib/elixir/unicode/Scripts.txt | 14 ++-- lib/elixir/unicode/SpecialCasing.txt | 6 +- lib/elixir/unicode/UnicodeData.txt | 7 ++ lib/elixir/unicode/confusables.txt | 10 +-- lib/elixir/unicode/unicode.ex | 2 +- 10 files changed, 164 insertions(+), 53 deletions(-) diff --git a/lib/elixir/lib/string.ex b/lib/elixir/lib/string.ex index fce586bf70c..c5c15142aaf 100644 --- a/lib/elixir/lib/string.ex +++ b/lib/elixir/lib/string.ex @@ -18,7 +18,7 @@ defmodule String do "hello world" The functions in this module act according to - [The Unicode Standard, Version 15.0.0](http://www.unicode.org/versions/Unicode15.0.0/). + [The Unicode Standard, Version 15.1.0](http://www.unicode.org/versions/Unicode15.1.0/). ## Interpolation diff --git a/lib/elixir/unicode/IdentifierType.txt b/lib/elixir/unicode/IdentifierType.txt index f0293ac88ee..695156e6ad7 100644 --- a/lib/elixir/unicode/IdentifierType.txt +++ b/lib/elixir/unicode/IdentifierType.txt @@ -1,11 +1,11 @@ # IdentifierType.txt -# Date: 2022-08-26, 16:49:09 GMT -# © 2022 Unicode®, Inc. +# Date: 2023-08-11, 17:46:40 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Security Mechanisms for UTS #39 -# Version: 15.0.0 +# Version: 15.1.0 # # For documentation and usage, see https://www.unicode.org/reports/tr39 # @@ -576,10 +576,11 @@ FA27..FA29 ; Recommended # 1.1 [3] CJK COMPATIBILITY ID 2B740..2B81D ; Recommended # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Recommended # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Recommended # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Recommended # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Recommended # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Recommended # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 112139 +# Total code points: 112761 # Identifier_Type: Inclusion @@ -1892,6 +1893,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH 2E9B..2E9E ; Not_XID # 3.0 [4] CJK RADICAL CHOKE..CJK RADICAL DEATH 2EA0..2EF2 ; Not_XID # 3.0 [83] CJK RADICAL CIVILIAN..CJK RADICAL J-SIMPLIFIED TURTLE 2FF0..2FFB ; Not_XID # 3.0 [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +2FFC..2FFF ; Not_XID # 15.1 [4] IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION 3001..3004 ; Not_XID # 1.1 [4] IDEOGRAPHIC COMMA..JAPANESE INDUSTRIAL STANDARD SYMBOL 3008..301D ; Not_XID # 1.1 [22] LEFT ANGLE BRACKET..REVERSED DOUBLE PRIME QUOTATION MARK 301F..3020 ; Not_XID # 1.1 [2] LOW DOUBLE PRIME QUOTATION MARK..POSTAL MARK FACE @@ -1903,6 +1905,7 @@ A8F8..A8FA ; Obsolete Not_XID # 5.2 [3] DEVANAGARI SIGN PUSH 3190..3191 ; Not_XID # 1.1 [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 31C0..31CF ; Not_XID # 4.1 [16] CJK STROKE T..CJK STROKE N 31D0..31E3 ; Not_XID # 5.1 [20] CJK STROKE H..CJK STROKE Q +31EF ; Not_XID # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3248..324F ; Not_XID # 5.2 [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE A67E ; Not_XID # 5.1 CYRILLIC KAVYKA A720..A721 ; Not_XID # 5.0 [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE @@ -2136,7 +2139,7 @@ FFFD ; Not_XID # 1.1 REPLACEMENT CHARACTE 1FB00..1FB92 ; Not_XID # 13.0 [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; Not_XID # 13.0 [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 5699 +# Total code points: 5704 # Identifier_Type: Not_NFKC diff --git a/lib/elixir/unicode/PropList.txt b/lib/elixir/unicode/PropList.txt index b49d6460c16..777e8a28818 100644 --- a/lib/elixir/unicode/PropList.txt +++ b/lib/elixir/unicode/PropList.txt @@ -1,6 +1,6 @@ -# PropList-15.0.0.txt -# Date: 2022-08-05, 22:17:16 GMT -# © 2022 Unicode®, Inc. +# PropList-15.1.0.txt +# Date: 2023-08-01, 21:56:53 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -856,11 +856,12 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Ideographic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Ideographic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 105854 +# Total code points: 106476 # ================================================ @@ -1241,9 +1242,10 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG # ================================================ 2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW -2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +2FF4..2FFD ; IDS_Binary_Operator # So [10] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT +31EF ; IDS_Binary_Operator # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION -# Total code points: 10 +# Total code points: 13 # ================================================ @@ -1253,6 +1255,12 @@ E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG # ================================================ +2FFE..2FFF ; IDS_Unary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION + +# Total code points: 2 + +# ================================================ + 2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -1275,10 +1283,11 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Unified_Ideograph # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Unified_Ideograph # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 97058 +# Total code points: 97680 # ================================================ @@ -1376,8 +1385,58 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET 0387 ; Other_ID_Continue # Po GREEK ANO TELEIA 1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE 19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE +200C..200D ; Other_ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +30FB ; Other_ID_Continue # Po KATAKANA MIDDLE DOT +FF65 ; Other_ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT -# Total code points: 12 +# Total code points: 16 + +# ================================================ + +00B2..00B3 ; ID_Compat_Math_Continue # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B9 ; ID_Compat_Math_Continue # No SUPERSCRIPT ONE +2070 ; ID_Compat_Math_Continue # No SUPERSCRIPT ZERO +2074..2079 ; ID_Compat_Math_Continue # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +207A..207C ; ID_Compat_Math_Continue # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; ID_Compat_Math_Continue # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; ID_Compat_Math_Continue # Pe SUPERSCRIPT RIGHT PARENTHESIS +2080..2089 ; ID_Compat_Math_Continue # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +208A..208C ; ID_Compat_Math_Continue # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; ID_Compat_Math_Continue # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; ID_Compat_Math_Continue # Pe SUBSCRIPT RIGHT PARENTHESIS +2202 ; ID_Compat_Math_Continue # Sm PARTIAL DIFFERENTIAL +2207 ; ID_Compat_Math_Continue # Sm NABLA +221E ; ID_Compat_Math_Continue # Sm INFINITY +1D6C1 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD NABLA +1D6DB ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6FB ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC NABLA +1D715 ; ID_Compat_Math_Continue # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D735 ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC NABLA +1D74F ; ID_Compat_Math_Continue # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D76F ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D789 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7A9 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7C3 ; ID_Compat_Math_Continue # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL + +# Total code points: 43 + +# ================================================ + +2202 ; ID_Compat_Math_Start # Sm PARTIAL DIFFERENTIAL +2207 ; ID_Compat_Math_Start # Sm NABLA +221E ; ID_Compat_Math_Start # Sm INFINITY +1D6C1 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD NABLA +1D6DB ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6FB ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC NABLA +1D715 ; ID_Compat_Math_Start # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D735 ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC NABLA +1D74F ; ID_Compat_Math_Start # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D76F ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D789 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7A9 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7C3 ; ID_Compat_Math_Start # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL + +# Total code points: 13 # ================================================ @@ -1398,6 +1457,7 @@ AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET 1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP 1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D5 ; Sentence_Terminal # Po [2] KHMER SIGN KHAN..KHMER SIGN BARIYOOSAN 1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP 1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP 1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK @@ -1462,7 +1522,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP -# Total code points: 154 +# Total code points: 156 # ================================================ diff --git a/lib/elixir/unicode/PropertyValueAliases.txt b/lib/elixir/unicode/PropertyValueAliases.txt index 863301b1e19..240cd28c4cc 100644 --- a/lib/elixir/unicode/PropertyValueAliases.txt +++ b/lib/elixir/unicode/PropertyValueAliases.txt @@ -1,6 +1,6 @@ -# PropertyValueAliases-15.0.0.txt -# Date: 2022-08-05, 23:42:17 GMT -# © 2022 Unicode®, Inc. +# PropertyValueAliases-15.1.0.txt +# Date: 2023-08-07, 15:21:34 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -91,6 +91,7 @@ age; 12.1 ; V12_1 age; 13.0 ; V13_0 age; 14.0 ; V14_0 age; 15.0 ; V15_0 +age; 15.1 ; V15_1 age; NA ; Unassigned # Alphabetic (Alpha) @@ -208,6 +209,7 @@ blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G blk; CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H +blk; CJK_Ext_I ; CJK_Unified_Ideographs_Extension_I blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement blk; CJK_Strokes ; CJK_Strokes blk; CJK_Symbols ; CJK_Symbols_And_Punctuation @@ -817,6 +819,21 @@ IDSB; Y ; Yes ; T IDST; N ; No ; F ; False IDST; Y ; Yes ; T ; True +# IDS_Unary_Operator (IDSU) + +IDSU; N ; No ; F ; False +IDSU; Y ; Yes ; T ; True + +# ID_Compat_Math_Continue (ID_Compat_Math_Continue) + +ID_Compat_Math_Continue; N ; No ; F ; False +ID_Compat_Math_Continue; Y ; Yes ; T ; True + +# ID_Compat_Math_Start (ID_Compat_Math_Start) + +ID_Compat_Math_Start; N ; No ; F ; False +ID_Compat_Math_Start; Y ; Yes ; T ; True + # ID_Continue (IDC) IDC; N ; No ; F ; False @@ -836,6 +853,13 @@ IDS; Y ; Yes ; T Ideo; N ; No ; F ; False Ideo; Y ; Yes ; T ; True +# Indic_Conjunct_Break (InCB) + +InCB; Consonant ; Consonant +InCB; Extend ; Extend +InCB; Linker ; Linker +InCB; None ; None + # Indic_Positional_Category (InPC) InPC; Bottom ; Bottom @@ -1074,7 +1098,10 @@ jt ; U ; Non_Joining # Line_Break (lb) lb ; AI ; Ambiguous +lb ; AK ; Aksara lb ; AL ; Alphabetic +lb ; AP ; Aksara_Prebase +lb ; AS ; Aksara_Start lb ; B2 ; Break_Both lb ; BA ; Break_After lb ; BB ; Break_Before @@ -1112,6 +1139,8 @@ lb ; SA ; Complex_Context lb ; SG ; Surrogate lb ; SP ; Space lb ; SY ; Break_Symbols +lb ; VF ; Virama_Final +lb ; VI ; Virama lb ; WJ ; Word_Joiner lb ; XX ; Unknown lb ; ZW ; ZWSpace @@ -1156,6 +1185,9 @@ NFKC_QC; M ; Maybe NFKC_QC; N ; No NFKC_QC; Y ; Yes +# NFKC_Simple_Casefold (NFKC_SCF) + + # NFKD_Quick_Check (NFKD_QC) NFKD_QC; N ; No diff --git a/lib/elixir/unicode/ScriptExtensions.txt b/lib/elixir/unicode/ScriptExtensions.txt index 2f5a1727e33..23141fb8241 100644 --- a/lib/elixir/unicode/ScriptExtensions.txt +++ b/lib/elixir/unicode/ScriptExtensions.txt @@ -1,6 +1,6 @@ -# ScriptExtensions-15.0.0.txt -# Date: 2022-02-02, 00:57:11 GMT -# © 2022 Unicode®, Inc. +# ScriptExtensions-15.1.0.txt +# Date: 2023-02-01, 23:02:24 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -136,20 +136,20 @@ # ================================================ -# Script_Extensions=Arab Rohg +# Script_Extensions=Arab Nkoo -06D4 ; Arab Rohg # Po ARABIC FULL STOP +FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS +FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS -# Total code points: 1 +# Total code points: 2 # ================================================ -# Script_Extensions=Arab Nkoo +# Script_Extensions=Arab Rohg -FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS -FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS +06D4 ; Arab Rohg # Po ARABIC FULL STOP -# Total code points: 2 +# Total code points: 1 # ================================================ @@ -553,17 +553,17 @@ FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC C # ================================================ -# Script_Extensions=Beng Deva Gran Knda Nand Orya Telu Tirh +# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc -1CF2 ; Beng Deva Gran Knda Nand Orya Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA +0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL # Total code points: 1 # ================================================ -# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc +# Script_Extensions=Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh -0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL +1CF2 ; Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA # Total code points: 1 @@ -572,10 +572,9 @@ FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC C # Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh A836..A837 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK -A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So NORTH INDIC QUANTITY MARK -# Total code points: 4 +# Total code points: 3 # ================================================ @@ -587,6 +586,14 @@ A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So # ================================================ +# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh + +A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK + +# Total code points: 1 + +# ================================================ + # Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh 0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN UDATTA @@ -595,17 +602,17 @@ A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So # ================================================ -# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh -A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS +A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS # Total code points: 3 # ================================================ -# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh -A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS +A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS # Total code points: 3 diff --git a/lib/elixir/unicode/Scripts.txt b/lib/elixir/unicode/Scripts.txt index 2b138bffb88..0b3f717cb20 100644 --- a/lib/elixir/unicode/Scripts.txt +++ b/lib/elixir/unicode/Scripts.txt @@ -1,6 +1,6 @@ -# Scripts-15.0.0.txt -# Date: 2022-04-26, 23:15:02 GMT -# © 2022 Unicode®, Inc. +# Scripts-15.1.0.txt +# Date: 2023-07-28, 16:01:07 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -357,7 +357,7 @@ 2E5B ; Common # Ps BOTTOM HALF LEFT PARENTHESIS 2E5C ; Common # Pe BOTTOM HALF RIGHT PARENTHESIS 2E5D ; Common # Pd OBLIQUE HYPHEN -2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +2FF0..2FFF ; Common # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION 3000 ; Common # Zs IDEOGRAPHIC SPACE 3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL @@ -399,6 +399,7 @@ 3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q +31EF ; Common # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN 322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO 3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE @@ -629,7 +630,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 8301 +# Total code points: 8306 # ================================================ @@ -1593,11 +1594,12 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Han # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 98408 +# Total code points: 99030 # ================================================ diff --git a/lib/elixir/unicode/SpecialCasing.txt b/lib/elixir/unicode/SpecialCasing.txt index 08d04fa9421..de08450a6b9 100644 --- a/lib/elixir/unicode/SpecialCasing.txt +++ b/lib/elixir/unicode/SpecialCasing.txt @@ -1,6 +1,6 @@ -# SpecialCasing-15.0.0.txt -# Date: 2022-02-02, 23:35:52 GMT -# © 2022 Unicode®, Inc. +# SpecialCasing-15.1.0.txt +# Date: 2023-01-05, 20:35:03 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # diff --git a/lib/elixir/unicode/UnicodeData.txt b/lib/elixir/unicode/UnicodeData.txt index ea963a7162c..bdcc41850d7 100644 --- a/lib/elixir/unicode/UnicodeData.txt +++ b/lib/elixir/unicode/UnicodeData.txt @@ -11231,6 +11231,10 @@ 2FF9;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT;So;0;ON;;;;;N;;;;; 2FFA;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT;So;0;ON;;;;;N;;;;; 2FFB;IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID;So;0;ON;;;;;N;;;;; +2FFC;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT;So;0;ON;;;;;N;;;;; +2FFD;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT;So;0;ON;;;;;N;;;;; +2FFE;IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION;So;0;ON;;;;;N;;;;; +2FFF;IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION;So;0;ON;;;;;N;;;;; 3000;IDEOGRAPHIC SPACE;Zs;0;WS; 0020;;;;N;;;;; 3001;IDEOGRAPHIC COMMA;Po;0;ON;;;;;N;;;;; 3002;IDEOGRAPHIC FULL STOP;Po;0;ON;;;;;N;IDEOGRAPHIC PERIOD;;;; @@ -11705,6 +11709,7 @@ 31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;; 31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;; 31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;; +31EF;IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION;So;0;ON;;;;;N;;;;; 31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;; 31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;; 31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;; @@ -34035,6 +34040,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 2CEA1;;Lo;0;L;;;;;N;;;;; 2CEB0;;Lo;0;L;;;;;N;;;;; 2EBE0;;Lo;0;L;;;;;N;;;;; +2EBF0;;Lo;0;L;;;;;N;;;;; +2EE5D;;Lo;0;L;;;;;N;;;;; 2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;; 2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;; 2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;; diff --git a/lib/elixir/unicode/confusables.txt b/lib/elixir/unicode/confusables.txt index 327a5f324e3..cf73eca009c 100644 --- a/lib/elixir/unicode/confusables.txt +++ b/lib/elixir/unicode/confusables.txt @@ -1,11 +1,11 @@ # confusables.txt -# Date: 2022-08-26, 16:49:08 GMT -# © 2022 Unicode®, Inc. +# Date: 2023-08-11, 17:46:40 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Security Mechanisms for UTS #39 -# Version: 15.0.0 +# Version: 15.1.0 # # For documentation and usage, see https://www.unicode.org/reports/tr39 # @@ -349,8 +349,8 @@ A4FA ; 002E 002E ; MA # ( ꓺ → .. ) LISU LETTER TONE MYA CYA → FULL STOP, F A6F4 ; A6F3 A6F3 ; MA #* ( ꛴ → ꛳꛳ ) BAMUM COLON → BAMUM FULL STOP, BAMUM FULL STOP # -30FB ; 00B7 ; MA #* ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ -FF65 ; 00B7 ; MA #* ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ +30FB ; 00B7 ; MA # ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ +FF65 ; 00B7 ; MA # ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ 16EB ; 00B7 ; MA #* ( ᛫ → · ) RUNIC SINGLE PUNCTUATION → MIDDLE DOT # 0387 ; 00B7 ; MA # ( · → · ) GREEK ANO TELEIA → MIDDLE DOT # 2E31 ; 00B7 ; MA #* ( ⸱ → · ) WORD SEPARATOR MIDDLE DOT → MIDDLE DOT # diff --git a/lib/elixir/unicode/unicode.ex b/lib/elixir/unicode/unicode.ex index 7cd5c0fb80f..f43d29ae85b 100644 --- a/lib/elixir/unicode/unicode.ex +++ b/lib/elixir/unicode/unicode.ex @@ -150,7 +150,7 @@ case_ignorable_categories = :binary.compile_pattern(["Mn", "Me", "Cf", "Lm", "Sk defmodule String.Unicode do @moduledoc false - def version, do: {15, 0, 0} + def version, do: {15, 1, 0} [unconditional_mappings, _conditional_mappings] = Path.join(__DIR__, "SpecialCasing.txt")