Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak existing Unicode tests, add a Unicode version test #212

Merged
merged 4 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 105 additions & 1 deletion tests/correct/test-catalog.xml
Original file line number Diff line number Diff line change
Expand Up @@ -608,12 +608,14 @@
<test-set name="unicode-range2">
<created by="SP" on="2021-12-21"/>
<ixml-grammar-ref href="unicode-range2.ixml"/>
<test-case name="unicode-range1">
<test-case name="unicode-range2">
<created by="SP" on="2021-12-21"/>
<modified by="MSM" on="2021-12-30"
change="supplied expected output"/>
<modified by="MSM" on="2021-12-30"
change="stripped trailing whitespace from input"/>
<modified by="MSM" on="2023-11-15"
change="rename using 2 not 1"/>
<test-string-ref href="unicode-range2.inp"/>
<result>
<assert-xml-ref href="unicode-range2.output.xml"/>
Expand All @@ -626,6 +628,11 @@
<created by="SP" on="2023-06-13"/>
<modified by="NDW" on="2023-06-15"
change="Added to the test catalog"/>
<modified by="MSM" on="2023-11-16"
change="Added dependencies to mark this as requiring Unicode 14.0 or later"/>
<dependencies Unicode-version="14.0"/>
<dependencies Unicode-version="15.0"/>
<dependencies Unicode-version="15.1"/>
<ixml-grammar-ref href="unicode-classes.ixml"/>
<test-case name="unicode-classes">
<created by="SP" on="2023-06-13"/>
Expand All @@ -638,6 +645,103 @@
</test-case>
</test-set>

<test-set name="unicode-version-check">
<created by="MSM" on="2023-11-15"/>
<ixml-grammar-ref href="unicode-version-diagnostic.ixml"/>

<test-case name="unicode-v06.0-diagnostic">
<dependencies Unicode-version="6.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v6.0.xml"/></result>
</test-case>

<test-case name="unicode-v06.1-diagnostic">
<dependencies Unicode-version="6.1"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v06.1.xml"/></result>
</test-case>

<test-case name="unicode-v06.2-diagnostic">
<dependencies Unicode-version="6.2"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v06.2.xml"/></result>
</test-case>

<test-case name="unicode-v06.3-diagnostic">
<dependencies Unicode-version="6.3"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v06.3.xml"/></result>
</test-case>

<test-case name="unicode-v07-diagnostic">
<dependencies Unicode-version="7.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v7.0.xml"/></result>
</test-case>

<test-case name="unicode-v08-diagnostic">
<dependencies Unicode-version="8.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v08.0.xml"/></result>
</test-case>

<test-case name="unicode-v09-diagnostic">
<dependencies Unicode-version="9.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v09.0.xml"/></result>
</test-case>

<test-case name="unicode-version-10-diagnostic">
<dependencies Unicode-version="10.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v10.0.xml"/></result>
</test-case>

<test-case name="unicode-version-11-diagnostic">
<dependencies Unicode-version="11.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v11.0.xml"/></result>
</test-case>

<test-case name="unicode-version-12-diagnostic">
<dependencies Unicode-version="12.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v12.0.xml"/></result>
</test-case>

<test-case name="unicode-version-12.1-diagnostic">
<dependencies Unicode-version="12.1"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v12.1.xml"/></result>
</test-case>

<test-case name="unicode-version-13-diagnostic">
<dependencies Unicode-version="13.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v13.0.xml"/></result>
</test-case>

<test-case name="unicode-version-14-diagnostic">
<dependencies Unicode-version="14.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v14.0.xml"/></result>
</test-case>

<test-case name="unicode-version-15-diagnostic">
<dependencies Unicode-version="15.0"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v15.0.xml"/></result>
</test-case>

<test-case name="unicode-version-15.1-diagnostic">
<dependencies Unicode-version="15.1"/>
<test-string-ref href="unicode-version-diagnostic.txt"/>
<result><assert-xml-ref href="unicode-v15.1.xml"/></result>
</test-case>

</test-set>


<test-set name="vcard">
<created by="SP" on="2021-12-16"/>
<ixml-grammar-ref href="vcard.ixml"/>
Expand Down
228 changes: 228 additions & 0 deletions tests/correct/unicode-version-diagnostic.ixml
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
{ An attempt to detect which version of Unicode the ixml processor is
currently using.

The input is a sequence of the following code points, separated
for legibility by whitespace.

First, some characters whose general category value has changed:

U+00A7 Section sign (So in 6.0, Po from 6.1)
U+2308 Left ceiling (Sm in 6.0-6.2, Ps from 6.3)
U+FD3E..FD3F Ornate L/R parenthesis (Ps,Pe in 6.0-6.3, Pe,Ps from 7)
U+13DD Cherokee letter TLA (Lo 6.0-7.0, Lu from 8)
U+1886 Mongolian letter Ali Gali three Baluda (Lo, then Mn from 9)

U+10D0 Georgian small letter AN (Lo in 6-10, Ll from 11)
U+166D Canadian syllabics CHI sign (Po in 6-11, So from 12)
U+1734 Hanundo sign PAMUDPOD (Mn in 6-13, Mc in 14-15)

Then a sequence of characters new in different versions:

6.0 U+1F086 So Playing card six of Hearts
6.1 U+2CF3 Ll Coptic small letter BOHAIRIC KHEI
6.2 U+20BA Sc Turkish Lira sign
6.3 adds only format control characters, omitted here
7.0 U+052B Ll Cyrillic small letter DZZHE
8.0 U+13F5 Lu Cherokee letter MV

9.0 U+1715C Lo (not named)
10.0 U+0860 Lo Syriac letter Malayalam NGA
11.0 U+0560 Ll Armenian ... turned AYB
12.0 U+0C77 Po Telugu sign SIDDHAM
12.1 U+32FF So Square era name REIWA

13.0 U+1FBF7 Nd Segmented digit seven
14.0 U+2E54 Po Medieval question mark
15.0 U+11B00 Po Devanagari head mark
15.1 U+2FFC So ideographic desc char surround from right

For the record, earlier versions had:
13.0 U+08BE Lo Arabic ... PAH with small v
14.0 U+061D Po Arabic end of text mark
but I'm trying to avoid confusing debuggers with bidi.

}
-unicode-version = unicode-6.0
| unicode-6.1
| unicode-6.2
| unicode-6.3
| unicode-7.0
| unicode-8.0
| unicode-9.0
| unicode-10.0
| unicode-11.0
| unicode-12.0
| unicode-12.1
| unicode-13.0
| unicode-14.0
| unicode-15.0
| unicode-15.1
.

unicode-6.0 = -[So], -[Sm], -[Ps], -[Pe], -[Lo], -[Lo], s,
{A7 2308 FD3E, FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -~[Ll], -~[Sc], -~[Ll], -~[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-~[Lo], -~[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-6.1 = -[Po], -[Sm], -[Ps], -[Pe], -[Lo], -[Lo], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -~[Sc], -~[Ll], -~[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-~[Lo], -~[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-6.2 = -[Po], -[Sm], -[Ps], -[Pe], -[Lo], -[Lo], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -~[Ll], -~[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-~[Lo], -~[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-6.3 = -[Po], -[Ps], -[Ps], -[Pe], -[Lo], -[Lo], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -~[Ll], -~[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-~[Lo], -~[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-7.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lo], -[Lo], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -~[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-~[Lo], -~[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-8.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Lo], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-~[Lo], -~[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-9.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -~[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-10.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Lo], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -~[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-11.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Ll], -[Po], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -[Ll], -~[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-12.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Ll], -[So], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -[Ll], -[Po], -~[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-12.1 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Ll], -[So], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -[Ll], -[Po], -[So], s,
{1715c 0860 0560 0c77 32ff }
-~[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-13.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Ll], -[So], -[Mn], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -[Ll], -[Po], -[So], s,
{1715c 0860 0560 0c77 32ff }
-[Nd], -~[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-14.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Ll], -[So], -[Mc], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -[Ll], -[Po], -[So], s,
{1715c 0860 0560 0c77 32ff }
-[Nd], -[Po], -~[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-15.0 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Ll], -[So], -[Mc], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -[Ll], -[Po], -[So], s,
{1715c 0860 0560 0c77 32ff }
-[Nd], -[Po], -[Po], -~[So], s.
{1fbf7 2e54 11b00 2ffc }

unicode-15.1 = -[Po], -[Ps], -[Pe], -[Ps], -[Lu], -[Mn], s,
{ A7 2308 FD3E , FD3F 13DD 1886 }
-[Ll], -[So], -[Mc], s,
{10d0 166d 1734 }
-[So], -[Ll], -[Sc], -[Ll], -[Lu], s,
{1f086 2cf3 20ba 052b 13f5 }
-[Lo], -[Lo], -[Ll], -[Po], -[So], s,
{1715c 0860 0560 0c77 32ff }
-[Nd], -[Po], -[Po], -[So], s.
{1fbf7 2e54 11b00 2ffc }


-s = -[Zs; #9; #A; #D]*.
5 changes: 5 additions & 0 deletions tests/correct/unicode-version-diagnostic.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
§⌈﴾﴿Ꮭᢆ
ა᙭᜴
🂆ⳳ₺ԫᏵ
𗅜ࡠՠ౷㋿
🯷⹔𑬀⿼
2 changes: 2 additions & 0 deletions tests/correct/unicode.v10.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-10.0/>
2 changes: 2 additions & 0 deletions tests/correct/unicode.v11.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-11.0/>
2 changes: 2 additions & 0 deletions tests/correct/unicode.v12.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-12.0/>
2 changes: 2 additions & 0 deletions tests/correct/unicode.v12.1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-12.1/>
2 changes: 2 additions & 0 deletions tests/correct/unicode.v13.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-13.0/>
2 changes: 2 additions & 0 deletions tests/correct/unicode.v14.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-14.0/>
2 changes: 2 additions & 0 deletions tests/correct/unicode.v15.0.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-15.0/>
2 changes: 2 additions & 0 deletions tests/correct/unicode.v15.1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<unicode-15.1/>
Loading
Loading