From 810f8d8dd0ef3446d3f781f64dfe85bb0b79ff42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janosch=20Mu=CC=88ller?= Date: Sat, 10 Jun 2023 13:58:14 +0200 Subject: [PATCH] Add some missing unicode properties --- CHANGELOG.md | 5 +++++ Gemfile | 2 +- lib/regexp_parser/scanner/properties/long.csv | 11 +++++++++++ lib/regexp_parser/scanner/properties/short.csv | 2 ++ .../syntax/token/unicode_property.rb | 15 ++++++++++++++- 5 files changed, 33 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bee475..7836c1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- support for extpict unicode property, added in Ruby 2.6 +- support for 10 unicode script/block properties added in Ruby 3.2 + ## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com) ### Added diff --git a/Gemfile b/Gemfile index e8472ce..f4fc4c3 100644 --- a/Gemfile +++ b/Gemfile @@ -5,7 +5,7 @@ gemspec group :development, :test do gem 'leto', '~> 2.0' gem 'rake', '~> 13.0' - gem 'regexp_property_values', '~> 1.3' + gem 'regexp_property_values', '~> 1.4' gem 'rspec', '~> 3.10' if RUBY_VERSION.to_f >= 2.7 gem 'benchmark-ips', '~> 2.1' diff --git a/lib/regexp_parser/scanner/properties/long.csv b/lib/regexp_parser/scanner/properties/long.csv index ca0ba24..7e644ae 100644 --- a/lib/regexp_parser/scanner/properties/long.csv +++ b/lib/regexp_parser/scanner/properties/long.csv @@ -7,6 +7,7 @@ age=12.0,age=12.0 age=12.1,age=12.1 age=13.0,age=13.0 age=14.0,age=14.0 +age=15.0,age=15.0 age=2.0,age=2.0 age=2.1,age=2.1 age=3.0,age=3.0 @@ -97,6 +98,7 @@ emojimodifierbase,emoji_modifier_base emojipresentation,emoji_presentation enclosingmark,enclosing_mark ethiopic,ethiopic +extendedpictographic,extended_pictographic extender,extender finalpunctuation,final_punctuation format,format @@ -139,6 +141,7 @@ inancientsymbols,in_ancient_symbols inarabic,in_arabic inarabicextendeda,in_arabic_extended_a inarabicextendedb,in_arabic_extended_b +inarabicextendedc,in_arabic_extended_c inarabicmathematicalalphabeticsymbols,in_arabic_mathematical_alphabetic_symbols inarabicpresentationformsa,in_arabic_presentation_forms_a inarabicpresentationformsb,in_arabic_presentation_forms_b @@ -186,6 +189,7 @@ incjkunifiedideographsextensiond,in_cjk_unified_ideographs_extension_d incjkunifiedideographsextensione,in_cjk_unified_ideographs_extension_e incjkunifiedideographsextensionf,in_cjk_unified_ideographs_extension_f incjkunifiedideographsextensiong,in_cjk_unified_ideographs_extension_g +incjkunifiedideographsextensionh,in_cjk_unified_ideographs_extension_h incombiningdiacriticalmarks,in_combining_diacritical_marks incombiningdiacriticalmarksextended,in_combining_diacritical_marks_extended incombiningdiacriticalmarksforsymbols,in_combining_diacritical_marks_for_symbols @@ -205,10 +209,12 @@ incyrillic,in_cyrillic incyrillicextendeda,in_cyrillic_extended_a incyrillicextendedb,in_cyrillic_extended_b incyrillicextendedc,in_cyrillic_extended_c +incyrillicextendedd,in_cyrillic_extended_d incyrillicsupplement,in_cyrillic_supplement indeseret,in_deseret indevanagari,in_devanagari indevanagariextended,in_devanagari_extended +indevanagariextendeda,in_devanagari_extended_a indingbats,in_dingbats indivesakuru,in_dives_akuru indogra,in_dogra @@ -268,6 +274,7 @@ inipaextensions,in_ipa_extensions initialpunctuation,initial_punctuation injavanese,in_javanese inkaithi,in_kaithi +inkaktoviknumerals,in_kaktovik_numerals inkanaextendeda,in_kana_extended_a inkanaextendedb,in_kana_extended_b inkanasupplement,in_kana_supplement @@ -276,6 +283,7 @@ inkangxiradicals,in_kangxi_radicals inkannada,in_kannada inkatakana,in_katakana inkatakanaphoneticextensions,in_katakana_phonetic_extensions +inkawi,in_kawi inkayahli,in_kayah_li inkharoshthi,in_kharoshthi inkhitansmallscript,in_khitan_small_script @@ -339,6 +347,7 @@ inmyanmar,in_myanmar inmyanmarextendeda,in_myanmar_extended_a inmyanmarextendedb,in_myanmar_extended_b innabataean,in_nabataean +innagmundari,in_nag_mundari innandinagari,in_nandinagari innewa,in_newa innewtailue,in_new_tai_lue @@ -457,6 +466,7 @@ joincontrol,join_control kaithi,kaithi kannada,kannada katakana,katakana +kawi,kawi kayahli,kayah_li kharoshthi,kharoshthi khitansmallscript,khitan_small_script @@ -503,6 +513,7 @@ mro,mro multani,multani myanmar,myanmar nabataean,nabataean +nagmundari,nag_mundari nandinagari,nandinagari newa,newa newline,newline diff --git a/lib/regexp_parser/scanner/properties/short.csv b/lib/regexp_parser/scanner/properties/short.csv index 675ac8d..05e0f74 100644 --- a/lib/regexp_parser/scanner/properties/short.csv +++ b/lib/regexp_parser/scanner/properties/short.csv @@ -57,6 +57,7 @@ emod,emoji_modifier epres,emoji_presentation ethi,ethiopic ext,extender +extpict,extended_pictographic geor,georgian glag,glagolitic gong,gunjala_gondi @@ -133,6 +134,7 @@ mtei,meetei_mayek mult,multani mymr,myanmar n,number +nagm,nag_mundari nand,nandinagari narb,old_north_arabian nbat,nabataean diff --git a/lib/regexp_parser/syntax/token/unicode_property.rb b/lib/regexp_parser/syntax/token/unicode_property.rb index 4fc62b0..09f7cb1 100644 --- a/lib/regexp_parser/syntax/token/unicode_property.rb +++ b/lib/regexp_parser/syntax/token/unicode_property.rb @@ -59,7 +59,7 @@ module Category Age_V3_1_0 = %i[age=13.0] - Age_V3_2_0 = %i[age=14.0] + Age_V3_2_0 = %i[age=14.0 age=15.0] Age = all[:Age_V] @@ -321,6 +321,8 @@ module Category Script_V3_2_0 = %i[ cypro_minoan + kawi + nag_mundari old_uyghur tangsa toto @@ -667,11 +669,18 @@ module Category UnicodeBlock_V3_2_0 = %i[ in_arabic_extended_b + in_arabic_extended_c + in_cjk_unified_ideographs_extension_h in_cypro_minoan + in_cyrillic_extended_d + in_devanagari_extended_a in_ethiopic_extended_b + in_kaktovik_numerals in_kana_extended_b + in_kawi in_latin_extended_f in_latin_extended_g + in_nag_mundari in_old_uyghur in_tangsa in_toto @@ -690,6 +699,10 @@ module Category emoji_presentation ] + Emoji_V2_6_0 = %i[ + extended_pictographic + ] + Emoji = all[:Emoji_V] V1_9_0 = Category::All + POSIX + all[:V1_9_0]