diff --git a/clj/project.clj b/clj/project.clj index 96512b0..bbc042a 100644 --- a/clj/project.clj +++ b/clj/project.clj @@ -4,4 +4,5 @@ :license {:name "Vim License" :url "http://vimdoc.sourceforge.net/htmldoc/uganda.html#license" :comments ":help license"} - :dependencies [[org.clojure/clojure "1.5.1"]]) + :dependencies [[org.clojure/clojure "1.5.1"] + [frak "0.1.3"]]) diff --git a/clj/src/vim_clojure_static/generate.clj b/clj/src/vim_clojure_static/generate.clj index 565218b..5dc9fcd 100644 --- a/clj/src/vim_clojure_static/generate.clj +++ b/clj/src/vim_clojure_static/generate.clj @@ -3,18 +3,26 @@ (ns vim-clojure-static.generate (:require [clojure.string :as string] - [clojure.set :as set])) + [clojure.set :as set] + [frak])) ;; ;; Helpers ;; +(defn vim-frak-pattern + "Create a non-capturing regular expression pattern compatible with Vim." + [strs] + (-> (str (frak/pattern strs)) + (string/replace #"\(\?:" "\\%\\("))) + (defn property-pattern "Vimscript very magic pattern for a character property class." ([s] (property-pattern s true)) - ([s braces?] (if braces? - (format "\\v\\\\[pP]\\{%s\\}" s) - (format "\\v\\\\[pP]%s" s)))) + ([s braces?] + (if braces? + (format "\\v\\\\[pP]\\{%s\\}" s) + (format "\\v\\\\[pP]%s" s)))) (defn syntax-match-properties "Vimscript literal `syntax match` for a character property class." @@ -22,7 +30,7 @@ ([group fmt props braces?] (format "syntax match %s \"%s\" contained display\n" (name group) - (property-pattern (format fmt (string/join \| (sort props))) braces?)))) + (property-pattern (format fmt (vim-frak-pattern props)) braces?)))) (defn get-private-field "Violate encapsulation and get the value of a private field." @@ -141,7 +149,7 @@ ;; `IsPosix` works, but is undefined. (syntax-match-properties :clojureRegexpPosixCharClass - "%%(%s)" + "%s" (:posix character-properties))) (def vim-java-char-classes @@ -149,7 +157,7 @@ ;; `IsjavaMethod` works, but is undefined. (syntax-match-properties :clojureRegexpJavaCharClass - "java%%(%s)" + "java%s" (map #(string/replace % #"\Ajava" "") (:java character-properties)))) (def vim-unicode-binary-char-classes @@ -158,26 +166,32 @@ ;; insensitively like the other Unicode properties. (syntax-match-properties :clojureRegexpUnicodeCharClass - "\\cIs%%(%s)" + "\\cIs%s" (map string/lower-case (:binary character-properties)))) (def vim-unicode-category-char-classes "Vimscript literal `syntax match` for Unicode General Category classes." - (let [cats (map seq (:category character-properties)) - cats (map (fn [[c subcats]] - (format "%s[%s]" c (apply str (sort (mapcat rest subcats))))) - (group-by first cats))] + (let [cats (sort (:category character-properties)) + chrs (->> (map seq cats) + (group-by first) + (keys) + (map str) + (sort))] ;; gc= and general_category= can be case insensitive, but this is behavior ;; is undefined. (str (syntax-match-properties :clojureRegexpUnicodeCharClass - "%%(%s)" - (sort (filter #(= (count %) 1) (:category character-properties))) + "%s" + chrs false) (syntax-match-properties :clojureRegexpUnicodeCharClass - "%%(Is|gc\\=|general_category\\=)?%%(%s)" + "%s" + cats) + (syntax-match-properties + :clojureRegexpUnicodeCharClass + "%%(Is|gc\\=|general_category\\=)?%s" cats)))) (def vim-unicode-script-char-classes @@ -189,7 +203,7 @@ ;; InScriptName works, but is undefined. (syntax-match-properties :clojureRegexpUnicodeCharClass - "\\c%%(Is|sc\\=|script\\=)%%(%s)" + "\\c%%(Is|sc\\=|script\\=)%s" (map string/lower-case (:script character-properties)))) (def vim-unicode-block-char-classes @@ -198,10 +212,26 @@ ;; of Is. (syntax-match-properties :clojureRegexpUnicodeCharClass - "\\c%%(In|blk\\=|block\\=)%%(%s)" + "\\c%%(In|blk\\=|block\\=)%s" (map string/lower-case (:block character-properties)))) +(def comprehensive-clojure-character-property-regexps + "A string representing a Clojure literal vector of regular expressions + containing all possible property character classes. For testing Vimscript + syntax matching optimizations." + (let [fmt (fn [prefix prop-key] + (let [props (map (partial format "\\p{%s%s}" prefix) + (sort (get character-properties prop-key)))] + (format "#\"%s\"" (string/join props))))] + (string/join \newline [(fmt "" :posix) + (fmt "" :java) + (fmt "Is" :binary) + (fmt "general_category=" :category) + (fmt "script=" :script) + (fmt "block=" :block)]))) + (comment + ;; Generate the vim literal definitions for pasting into the runtime files. (spit "tmp/clojure-defs.vim" (str generation-comment clojure-version-comment @@ -218,4 +248,8 @@ vim-unicode-binary-char-classes vim-unicode-category-char-classes vim-unicode-script-char-classes - vim-unicode-block-char-classes))) + vim-unicode-block-char-classes)) + ;; Generate an example file with all possible character property literals. + (spit "tmp/all-char-props.clj" + comprehensive-clojure-character-property-regexps)) + diff --git a/clj/src/vim_clojure_static/test.clj b/clj/src/vim_clojure_static/test.clj index b4ac94f..a8511de 100644 --- a/clj/src/vim_clojure_static/test.clj +++ b/clj/src/vim_clojure_static/test.clj @@ -17,7 +17,7 @@ [file & lines] (io/make-parents file) (spit file (string/join \newline lines)) - (shell/sh "vim" "-u" "NONE" "-N" "-S" "vim/syn-id-names.vim" file) + (shell/sh "vim" "-u" "NONE" "-N" "-S" "vim/test-runtime.vim" file) ;; The last line of the file will contain valid EDN (into {} (map (fn [l ids] [l (mapv keyword ids)]) lines @@ -69,6 +69,42 @@ ss λs))) contexts))))) +(defn vim-nfa-dump + "Run a patched version of Vim compiled with -DDEBUG on a new file containing + buffer, then move the NFA log to log-path. The patch is located at + vim/custom-nfa-log.patch" + [vim-path buffer log-path] + (let [file "tmp/nfa-test-file.clj"] + (spit file buffer) + (time (shell/sh vim-path "-u" "NONE" "-N" "-S" "vim/test-runtime.vim" file)) + (shell/sh "mv" "nfa_regexp.log" log-path))) + +(defn compare-nfa-dumps + "Dump NFA logs with given buffer and syntax-files; log-files are written to + tmp/ and are distinguished by the hash of the buffer and syntax script. + + The vim-path passed to vim-nfa-dump should either be in the VIMDEBUG + environment variable, or be the top vim in your PATH. + + Returns the line count of each corresponding log file." + [buf [& syntax-files] & opts] + (let [{:keys [vim-path] + :or {vim-path (or (System/getenv "VIMDEBUG") "vim")}} opts + syn-path "../syntax/clojure.vim" + orig-syn (slurp syn-path) + buf-hash (hash buf)] + (try + (mapv (fn [path] + (let [syn-buf (slurp path) + syn-hash (hash syn-buf) + log-path (format "tmp/debug:%d:%d.log" buf-hash syn-hash)] + (spit syn-path syn-buf) + (vim-nfa-dump vim-path buf log-path) + (count (re-seq #"\n" (slurp log-path))))) + syntax-files) + (finally + (spit syn-path orig-syn))))) + (comment (macroexpand-1 @@ -80,4 +116,12 @@ ["^" #(= % [:clojureRegexpBoundary])]])) (test #'number-literals-test) + (defn dump! [buf] + (compare-nfa-dumps (format "#\"\\p{%s}\"\n" buf) + ["../syntax/clojure.vim" "tmp/altsyntax.vim"])) + + (dump! "Ll") + (dump! "javaLowercase") + (dump! "block=UNIFIED CANADIAN ABORIGINAL SYLLABICS") + ) diff --git a/clj/test/vim_clojure_static/syntax_test.clj b/clj/test/vim_clojure_static/syntax_test.clj index da31cf6..5feb5c5 100644 --- a/clj/test/vim_clojure_static/syntax_test.clj +++ b/clj/test/vim_clojure_static/syntax_test.clj @@ -92,6 +92,28 @@ (comment (test #'number-literals-test)) +;; TODO: Finish me! (this was in an old git stash) +;; (defsyntaxtest keywords-test +;; (with-format "%s" +;; ":1" kw +;; ":A" kw +;; ":a" kw +;; ":αβγ" kw +;; "::a" kw +;; ":a/b" kw +;; ":a:b" kw +;; ":a:b/:c:b" kw +;; ":a/b/c/d" kw +;; "::a/b" !kw +;; "::" !kw +;; ":a:" !kw +;; ":a/" !kw +;; ":/" !kw +;; ":" !kw +;; )) +;; +;; (comment (test #'keywords-test)) + (defsyntaxtest java-regexp-literals-test ["#\"%s\"" [;; http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html @@ -221,8 +243,6 @@ "\\p{IsLatin}" regexp-unicode-char-class ;; \p{InGreek} A character in the Greek block (block) "\\p{InGreek}" regexp-unicode-char-class - ;; \p{Lu} An uppercase letter (category) - "\\p{Lu}" regexp-unicode-char-class ;; \p{IsAlphabetic} An alphabetic character (binary property) "\\p{IsAlphabetic}" regexp-unicode-char-class ;; \p{Sc} A currency symbol @@ -231,6 +251,13 @@ "\\P{InGreek}" regexp-unicode-char-class ;; [\p{L}&&[^\p{Lu}]] Any letter except an uppercase letter (subtraction) + ;; Abbreviated categories + "\\pL" regexp-unicode-char-class + "\\p{L}" regexp-unicode-char-class + "\\p{Lu}" regexp-unicode-char-class + "\\p{gc=L}" regexp-unicode-char-class + "\\p{IsLu}" regexp-unicode-char-class + ;;;; Invalid classes "\\P{Xzibit}" !regexp-posix-char-class diff --git a/clj/vim/custom-nfa-log.patch b/clj/vim/custom-nfa-log.patch new file mode 100644 index 0000000..00096f8 --- /dev/null +++ b/clj/vim/custom-nfa-log.patch @@ -0,0 +1,75 @@ +commit 09b1321fd7fa91e5a8e36ebc2d858079969adca4 (HEAD, github/custom-nfa-log, custom-nfa-log) +Author: guns +Date: Thu, 1 Aug 2013 10:56:19 -0500 + + Add custom NFA logging to nfa_regexp.log + + The goal is to get a ballpark estimate of the number of steps the regexp + engine undertakes in a session. One line of logging is done: + + * Start of nfa_regmatch() + * For each character considered + * For each state of a character considered + + Vimm should be compiled with -DDEBUG; if the NFA engine state graphs are + desired, then also use -DINCLUDE_NFA_DUMP. + + This branch can also be found at: + + https://github.com/guns/vim/tree/custom-nfa-log +--- + src/regexp_nfa.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c +index 738ac3b..bea9d50 100644 +--- a/src/regexp_nfa.c ++++ b/src/regexp_nfa.c +@@ -24,9 +24,18 @@ + #ifdef DEBUG + # define NFA_REGEXP_ERROR_LOG "nfa_regexp_error.log" + # define ENABLE_LOG +-# define NFA_REGEXP_DUMP_LOG "nfa_regexp_dump.log" +-# define NFA_REGEXP_RUN_LOG "nfa_regexp_run.log" +-# define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log" ++# ifdef INCLUDE_NFA_DUMP ++# define NFA_REGEXP_DUMP_LOG "nfa_regexp.log" ++# else ++# define NFA_REGEXP_DUMP_LOG "/dev/null" ++# endif ++# define NFA_REGEXP_RUN_LOG "/dev/null" ++# define NFA_REGEXP_DEBUG_LOG "/dev/null" ++# define LOG(fmt, ...) do { \ ++ FILE *log = fopen("nfa_regexp.log", "a"); \ ++ fprintf(log, fmt, __VA_ARGS__); \ ++ fclose(log); \ ++} while (0) + #endif + + enum +@@ -5043,6 +5052,8 @@ nfa_regmatch(prog, start, submatch, m) + goto theend; + + #ifdef ENABLE_LOG ++ LOG("START nfa_regmatch: alloc=%d pattern=\"%s\"\n", size*2, prog->pattern); ++ + log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); + if (log_fd != NULL) + { +@@ -5129,6 +5140,8 @@ nfa_regmatch(prog, start, submatch, m) + nextlist->id = nfa_listid + 1; + + #ifdef ENABLE_LOG ++ LOG("# states=%d reginput=\"%s\"\n", thislist->n, reginput); ++ + fprintf(log_fd, "------------------------------------------\n"); + fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput); + fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc); +@@ -5161,6 +5174,7 @@ nfa_regmatch(prog, start, submatch, m) + fprintf(debug, "%s, ", code); + #endif + #ifdef ENABLE_LOG ++ LOG("## computing nextlist: code=\"%s\"\n", code); + { + int col; + diff --git a/clj/vim/syn-id-names.vim b/clj/vim/test-runtime.vim similarity index 65% rename from clj/vim/syn-id-names.vim rename to clj/vim/test-runtime.vim index 6b13b98..8dfd714 100644 --- a/clj/vim/syn-id-names.vim +++ b/clj/vim/test-runtime.vim @@ -3,16 +3,23 @@ execute 'set rtp=' . expand('%:p:h:h:h') . ',$VIMRUNTIME' filetype plugin on syntax on +set synmaxcol=0 setfiletype clojure -function! s:append_syn_id_names() +if !exists('g:testing') + let g:testing = 1 +endif + +function! s:syn_id_names() let names = [] for lnum in range(1, line('$')) let f = 'synIDattr(synID(' . lnum . ', v:val, 0), "name")' call add(names, map(range(1, virtcol([lnum, '$']) - 1), f)) endfor - " Changing the quotes will make this valid EDN - call append(line('$'), tr(string(names), "'", '"')) + return names endfunction -call s:append_syn_id_names() | write | quitall! +if g:testing + " Changing the quotes will make this valid EDN + call append(line('$'), tr(string(s:syn_id_names()), "'", '"')) | write | quitall! +endif diff --git a/indent/clojure.vim b/indent/clojure.vim index fd76500..3609768 100644 --- a/indent/clojure.vim +++ b/indent/clojure.vim @@ -8,6 +8,11 @@ " License: Same as Vim " Last Change: 30 January 2013 +" TODO: Indenting after multibyte characters is broken: +" (let [Δ (if foo +" bar ; Indent error +" baz)]) + if exists("b:did_indent") finish endif diff --git a/syntax/clojure.vim b/syntax/clojure.vim index a773abd..95a3b64 100644 --- a/syntax/clojure.vim +++ b/syntax/clojure.vim @@ -82,13 +82,14 @@ syntax region clojureRegexpQuote start=/\\Q/ skip=/\\\\\|\\"/ end=/\\E/ " Character property classes " Generated from https://github.com/guns/vim-clojure-static/blob/vim-release-004/clj/src/vim_clojure_static/generate.clj " Java version 1.7.0_17 -syntax match clojureRegexpPosixCharClass "\v\\[pP]\{%(ASCII|Alnum|Alpha|Blank|Cntrl|Digit|Graph|Lower|Print|Punct|Space|Upper|XDigit)\}" contained display -syntax match clojureRegexpJavaCharClass "\v\\[pP]\{java%(Alphabetic|Defined|Digit|ISOControl|IdentifierIgnorable|Ideographic|JavaIdentifierPart|JavaIdentifierStart|Letter|LetterOrDigit|LowerCase|Mirrored|SpaceChar|TitleCase|UnicodeIdentifierPart|UnicodeIdentifierStart|UpperCase|Whitespace)\}" contained display -syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{\cIs%(alnum|alphabetic|assigned|blank|control|digit|graph|hex_digit|hexdigit|ideographic|letter|lowercase|noncharacter_code_point|noncharactercodepoint|print|punctuation|titlecase|uppercase|white_space|whitespace|word)\}" contained display -syntax match clojureRegexpUnicodeCharClass "\v\\[pP]%(C|L|M|N|P|S|Z)" contained display -syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{%(Is|gc\=|general_category\=)?%(C[cfnos]|L[CDlmotu]|M[cen]|N[dlo]|P[cdefios]|S[ckmo]|Z[lps])\}" contained display -syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{\c%(Is|sc\=|script\=)%(arab|arabic|armenian|armi|armn|avestan|avst|bali|balinese|bamu|bamum|batak|batk|beng|bengali|bopo|bopomofo|brah|brahmi|brai|braille|bugi|buginese|buhd|buhid|canadian_aboriginal|cans|cari|carian|cham|cher|cherokee|common|copt|coptic|cprt|cuneiform|cypriot|cyrillic|cyrl|deseret|deva|devanagari|dsrt|egyp|egyptian_hieroglyphs|ethi|ethiopic|geor|georgian|glag|glagolitic|goth|gothic|greek|grek|gujarati|gujr|gurmukhi|guru|han|hang|hangul|hani|hano|hanunoo|hebr|hebrew|hira|hiragana|imperial_aramaic|inherited|inscriptional_pahlavi|inscriptional_parthian|ital|java|javanese|kaithi|kali|kana|kannada|katakana|kayah_li|khar|kharoshthi|khmer|khmr|knda|kthi|lana|lao|laoo|latin|latn|lepc|lepcha|limb|limbu|linb|linear_b|lisu|lyci|lycian|lydi|lydian|malayalam|mand|mandaic|meetei_mayek|mlym|mong|mongolian|mtei|myanmar|mymr|new_tai_lue|nko|nkoo|ogam|ogham|ol_chiki|olck|old_italic|old_persian|old_south_arabian|old_turkic|oriya|orkh|orya|osma|osmanya|phag|phags_pa|phli|phnx|phoenician|prti|rejang|rjng|runic|runr|samaritan|samr|sarb|saur|saurashtra|shavian|shaw|sinh|sinhala|sund|sundanese|sylo|syloti_nagri|syrc|syriac|tagalog|tagb|tagbanwa|tai_le|tai_tham|tai_viet|tale|talu|tamil|taml|tavt|telu|telugu|tfng|tglg|thaa|thaana|thai|tibetan|tibt|tifinagh|ugar|ugaritic|unknown|vai|vaii|xpeo|xsux|yi|yiii|zinh|zyyy|zzzz)\}" contained display -syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{\c%(In|blk\=|block\=)%(aegean numbers|aegean_numbers|aegeannumbers|alchemical symbols|alchemical_symbols|alchemicalsymbols|alphabetic presentation forms|alphabetic_presentation_forms|alphabeticpresentationforms|ancient greek musical notation|ancient greek numbers|ancient symbols|ancient_greek_musical_notation|ancient_greek_numbers|ancient_symbols|ancientgreekmusicalnotation|ancientgreeknumbers|ancientsymbols|arabic|arabic presentation forms-a|arabic presentation forms-b|arabic supplement|arabic_presentation_forms_a|arabic_presentation_forms_b|arabic_supplement|arabicpresentationforms-a|arabicpresentationforms-b|arabicsupplement|armenian|arrows|avestan|balinese|bamum|bamum supplement|bamum_supplement|bamumsupplement|basic latin|basic_latin|basiclatin|batak|bengali|block elements|block_elements|blockelements|bopomofo|bopomofo extended|bopomofo_extended|bopomofoextended|box drawing|box_drawing|boxdrawing|brahmi|braille patterns|braille_patterns|braillepatterns|buginese|buhid|byzantine musical symbols|byzantine_musical_symbols|byzantinemusicalsymbols|carian|cham|cherokee|cjk compatibility|cjk compatibility forms|cjk compatibility ideographs|cjk compatibility ideographs supplement|cjk radicals supplement|cjk strokes|cjk symbols and punctuation|cjk unified ideographs|cjk unified ideographs extension a|cjk unified ideographs extension b|cjk unified ideographs extension c|cjk unified ideographs extension d|cjk_compatibility|cjk_compatibility_forms|cjk_compatibility_ideographs|cjk_compatibility_ideographs_supplement|cjk_radicals_supplement|cjk_strokes|cjk_symbols_and_punctuation|cjk_unified_ideographs|cjk_unified_ideographs_extension_a|cjk_unified_ideographs_extension_b|cjk_unified_ideographs_extension_c|cjk_unified_ideographs_extension_d|cjkcompatibility|cjkcompatibilityforms|cjkcompatibilityideographs|cjkcompatibilityideographssupplement|cjkradicalssupplement|cjkstrokes|cjksymbolsandpunctuation|cjkunifiedideographs|cjkunifiedideographsextensiona|cjkunifiedideographsextensionb|cjkunifiedideographsextensionc|cjkunifiedideographsextensiond|combining diacritical marks|combining diacritical marks for symbols|combining diacritical marks supplement|combining half marks|combining marks for symbols|combining_diacritical_marks|combining_diacritical_marks_supplement|combining_half_marks|combining_marks_for_symbols|combiningdiacriticalmarks|combiningdiacriticalmarksforsymbols|combiningdiacriticalmarkssupplement|combininghalfmarks|combiningmarksforsymbols|common indic number forms|common_indic_number_forms|commonindicnumberforms|control pictures|control_pictures|controlpictures|coptic|counting rod numerals|counting_rod_numerals|countingrodnumerals|cuneiform|cuneiform numbers and punctuation|cuneiform_numbers_and_punctuation|cuneiformnumbersandpunctuation|currency symbols|currency_symbols|currencysymbols|cypriot syllabary|cypriot_syllabary|cypriotsyllabary|cyrillic|cyrillic extended-a|cyrillic extended-b|cyrillic supplement|cyrillic supplementary|cyrillic_extended_a|cyrillic_extended_b|cyrillic_supplementary|cyrillicextended-a|cyrillicextended-b|cyrillicsupplement|cyrillicsupplementary|deseret|devanagari|devanagari extended|devanagari_extended|devanagariextended|dingbats|domino tiles|domino_tiles|dominotiles|egyptian hieroglyphs|egyptian_hieroglyphs|egyptianhieroglyphs|emoticons|enclosed alphanumeric supplement|enclosed alphanumerics|enclosed cjk letters and months|enclosed ideographic supplement|enclosed_alphanumeric_supplement|enclosed_alphanumerics|enclosed_cjk_letters_and_months|enclosed_ideographic_supplement|enclosedalphanumerics|enclosedalphanumericsupplement|enclosedcjklettersandmonths|enclosedideographicsupplement|ethiopic|ethiopic extended|ethiopic extended-a|ethiopic supplement|ethiopic_extended|ethiopic_extended_a|ethiopic_supplement|ethiopicextended|ethiopicextended-a|ethiopicsupplement|general punctuation|general_punctuation|generalpunctuation|geometric shapes|geometric_shapes|geometricshapes|georgian|georgian supplement|georgian_supplement|georgiansupplement|glagolitic|gothic|greek|greek and coptic|greek extended|greek_extended|greekandcoptic|greekextended|gujarati|gurmukhi|halfwidth and fullwidth forms|halfwidth_and_fullwidth_forms|halfwidthandfullwidthforms|hangul compatibility jamo|hangul jamo|hangul jamo extended-a|hangul jamo extended-b|hangul syllables|hangul_compatibility_jamo|hangul_jamo|hangul_jamo_extended_a|hangul_jamo_extended_b|hangul_syllables|hangulcompatibilityjamo|hanguljamo|hanguljamoextended-a|hanguljamoextended-b|hangulsyllables|hanunoo|hebrew|high private use surrogates|high surrogates|high_private_use_surrogates|high_surrogates|highprivateusesurrogates|highsurrogates|hiragana|ideographic description characters|ideographic_description_characters|ideographicdescriptioncharacters|imperial aramaic|imperial_aramaic|imperialaramaic|inscriptional pahlavi|inscriptional parthian|inscriptional_pahlavi|inscriptional_parthian|inscriptionalpahlavi|inscriptionalparthian|ipa extensions|ipa_extensions|ipaextensions|javanese|kaithi|kana supplement|kana_supplement|kanasupplement|kanbun|kangxi radicals|kangxi_radicals|kangxiradicals|kannada|katakana|katakana phonetic extensions|katakana_phonetic_extensions|katakanaphoneticextensions|kayah li|kayah_li|kayahli|kharoshthi|khmer|khmer symbols|khmer_symbols|khmersymbols|lao|latin extended additional|latin extended-a|latin extended-b|latin extended-c|latin extended-d|latin-1 supplement|latin-1supplement|latin_1_supplement|latin_extended_a|latin_extended_additional|latin_extended_b|latin_extended_c|latin_extended_d|latinextended-a|latinextended-b|latinextended-c|latinextended-d|latinextendedadditional|lepcha|letterlike symbols|letterlike_symbols|letterlikesymbols|limbu|linear b ideograms|linear b syllabary|linear_b_ideograms|linear_b_syllabary|linearbideograms|linearbsyllabary|lisu|low surrogates|low_surrogates|lowsurrogates|lycian|lydian|mahjong tiles|mahjong_tiles|mahjongtiles|malayalam|mandaic|mathematical alphanumeric symbols|mathematical operators|mathematical_alphanumeric_symbols|mathematical_operators|mathematicalalphanumericsymbols|mathematicaloperators|meetei mayek|meetei_mayek|meeteimayek|miscellaneous mathematical symbols-a|miscellaneous mathematical symbols-b|miscellaneous symbols|miscellaneous symbols and arrows|miscellaneous symbols and pictographs|miscellaneous technical|miscellaneous_mathematical_symbols_a|miscellaneous_mathematical_symbols_b|miscellaneous_symbols|miscellaneous_symbols_and_arrows|miscellaneous_symbols_and_pictographs|miscellaneous_technical|miscellaneousmathematicalsymbols-a|miscellaneousmathematicalsymbols-b|miscellaneoussymbols|miscellaneoussymbolsandarrows|miscellaneoussymbolsandpictographs|miscellaneoustechnical|modifier tone letters|modifier_tone_letters|modifiertoneletters|mongolian|musical symbols|musical_symbols|musicalsymbols|myanmar|myanmar extended-a|myanmar_extended_a|myanmarextended-a|new tai lue|new_tai_lue|newtailue|nko|number forms|number_forms|numberforms|ogham|ol chiki|ol_chiki|olchiki|old italic|old persian|old south arabian|old turkic|old_italic|old_persian|old_south_arabian|old_turkic|olditalic|oldpersian|oldsoutharabian|oldturkic|optical character recognition|optical_character_recognition|opticalcharacterrecognition|oriya|osmanya|phags-pa|phags_pa|phaistos disc|phaistos_disc|phaistosdisc|phoenician|phonetic extensions|phonetic extensions supplement|phonetic_extensions|phonetic_extensions_supplement|phoneticextensions|phoneticextensionssupplement|playing cards|playing_cards|playingcards|private use area|private_use_area|privateusearea|rejang|rumi numeral symbols|rumi_numeral_symbols|ruminumeralsymbols|runic|samaritan|saurashtra|shavian|sinhala|small form variants|small_form_variants|smallformvariants|spacing modifier letters|spacing_modifier_letters|spacingmodifierletters|specials|sundanese|superscripts and subscripts|superscripts_and_subscripts|superscriptsandsubscripts|supplemental arrows-a|supplemental arrows-b|supplemental mathematical operators|supplemental punctuation|supplemental_arrows_a|supplemental_arrows_b|supplemental_mathematical_operators|supplemental_punctuation|supplementalarrows-a|supplementalarrows-b|supplementalmathematicaloperators|supplementalpunctuation|supplementary private use area-a|supplementary private use area-b|supplementary_private_use_area_a|supplementary_private_use_area_b|supplementaryprivateusearea-a|supplementaryprivateusearea-b|surrogates_area|syloti nagri|syloti_nagri|sylotinagri|syriac|tagalog|tagbanwa|tags|tai le|tai tham|tai viet|tai xuan jing symbols|tai_le|tai_tham|tai_viet|tai_xuan_jing_symbols|taile|taitham|taiviet|taixuanjingsymbols|tamil|telugu|thaana|thai|tibetan|tifinagh|transport and map symbols|transport_and_map_symbols|transportandmapsymbols|ugaritic|unified canadian aboriginal syllabics|unified canadian aboriginal syllabics extended|unified_canadian_aboriginal_syllabics|unified_canadian_aboriginal_syllabics_extended|unifiedcanadianaboriginalsyllabics|unifiedcanadianaboriginalsyllabicsextended|vai|variation selectors|variation selectors supplement|variation_selectors|variation_selectors_supplement|variationselectors|variationselectorssupplement|vedic extensions|vedic_extensions|vedicextensions|vertical forms|vertical_forms|verticalforms|yi radicals|yi syllables|yi_radicals|yi_syllables|yijing hexagram symbols|yijing_hexagram_symbols|yijinghexagramsymbols|yiradicals|yisyllables)\}" contained display +syntax match clojureRegexpPosixCharClass "\v\\[pP]\{%(Blank|P%(unct|rint)|Digit|Graph|A%(l%(pha|num)|SCII)|XDigit|Space|Upper|Lower|Cntrl)\}" contained display +syntax match clojureRegexpJavaCharClass "\v\\[pP]\{java%(U%(pperCase|nicodeIdentifier%(Start|Part))|Mirrored|Alphabetic|SpaceChar|D%(efined|igit)|Whitespace|L%(etter%(OrDigit)?|owerCase)|TitleCase|I%(de%(ographic|ntifierIgnorable)|SOControl)|JavaIdentifier%(Start|Part))\}" contained display +syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{\cIs%(blank|hex%(digit|_digit)|uppercase|digit|control|a%(ssigned|l%(phabetic|num))|graph|ideographic|p%(rint|unctuation)|titlecase|w%(hite%(_space|space)|ord)|l%(owercase|etter)|noncharacter%(codepoint|_code_point))\}" contained display +syntax match clojureRegexpUnicodeCharClass "\v\\[pP][MLNPSZC]" contained display +syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{%(C[fonsc]?|L[DmloutC]?|S[kmoc]?|P[edfiosc]?|M[enc]?|Z[lps]?|N[dlo]?)\}" contained display +syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{%(Is|gc\=|general_category\=)?%(C[fonsc]?|L[DmloutC]?|S[kmoc]?|P[edfiosc]?|M[enc]?|Z[lps]?|N[dlo]?)\}" contained display +syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{\c%(Is|sc\=|script\=)%(h%(ira%(gana)?|an%([io]|g%(ul)?|unoo)?|ebr%(ew)?)|yi%(ii)?|java%(nese)?|c%(prt|y%(r%(illic|l)|priot)|h%(er%(okee)?|am)|uneiform|o%(mmon|pt%(ic)?)|a%(n%(adian_aboriginal|s)|ri%(an)?))|i%(mperial_aramaic|tal|n%(herited|scriptional_pa%(rthian|hlavi)))|a%(r%(ab%(ic)?|m%([in]|enian))|v%(st|estan))|g%(oth%(ic)?|u%(j%(arati|r)|r%(mukhi|u))|lag%(olitic)?|eor%(gian)?|re%(k|ek))|b%(u%(gi%(nese)?|h%(d|id))|ra%(i%(lle)?|h%(mi)?)|a%(mum?|t%(k|ak)|li%(nese)?)|opo%(mofo)?|eng%(ali)?)|s%(ha%(w|vian)|und%(anese)?|y%(r%(iac|c)|lo%(ti_nagri)?)|inh%(ala)?|a%(ur%(ashtra)?|rb|m%(r|aritan)))|d%(srt|e%(va%(nagari)?|seret))|l%(a%(na|oo?|t%(n|in))|epc%(ha)?|i%(n%(ear_b|b)|mbu?|su)|y%([dc]i%(an)?))|p%(h%(oenician|nx|li|ag%(s_pa)?)|rti)|e%(gyp%(tian_hieroglyphs)?|thi%(opic)?)|r%(jng|un%(ic|r)|ejang)|u%(nknown|gar%(itic)?)|vaii?|n%(koo?|ew_tai_lue)|m%(y%(mr|anmar)|tei|a%(nd%(aic)?|layalam)|ong%(olian)?|eetei_mayek|lym)|z%(inh|yyy|zzz)|t%(glg|fng|i%(finagh|b%(t|etan))|ha%(i|a%(na)?)|elu%(gu)?|a%(i_%(viet|le|tham)|l[eu]|g%(alog|b%(anwa)?)|vt|m%(l|il)))|x%(sux|peo)|o%(r%(iya|kh|ya)|sma%(nya)?|g%(am|ham)|l%(ck|d_%(south_arabian|turkic|italic|persian)|_chiki))|k%(a%(takana|yah_li|n%(nada|a)|li|ithi)|h%(m%(r|er)|ar%(oshthi)?)|thi|nda))\}" contained display +syntax match clojureRegexpUnicodeCharClass "\v\\[pP]\{\c%(In|blk\=|block\=)%(s%(a%(maritan|urashtra)|mall%(_form_variants| form variants|formvariants)|inhala|y%(loti%(nagri|[_ ]nagri)|riac)|havian|p%(ecials|acing%(modifierletters|_modifier_letters| modifier letters))|u%(p%(erscripts%(andsubscripts|_and_subscripts| and subscripts)|plementa%(ry%( private use area-[ab]|_private_use_area_[ab]|privateusearea-[ab])|l%( %(arrows-[ab]|punctuation|mathematical operators)|arrows-[ab]|_%(punctuation|mathematical_operators|arrows_[ab])|punctuation|mathematicaloperators)))|ndanese|rrogates_area))|l%(a%(o|tin%(_%(extended_%(a%(dditional)?|[dcb])|1_supplement)| extended%(-[dacb]| additional)|-1%( supplement|supplement)|extended%(additional|-[dacb])))|e%(pcha|tterlike%(symbols|[_ ]symbols))|y[cd]ian|i%(su|mbu|near%(b%(ideograms|syllabary)|_b_%(ideograms|syllabary)| b %(ideograms|syllabary)))|ow%([_ ]surrogates|surrogates))|b%(a%(sic%(latin|[_ ]latin)|tak|linese|mum%([_ ]supplement|supplement)?)|yzantine%(_musical_symbols| musical symbols|musicalsymbols)|engali|u%(ginese|hid)|lock%(elements|[_ ]elements)|ra%(hmi|ille%([ _]patterns|patterns))|o%(x%(drawing|[_ ]drawing)|pomofo%([ _]extended|extended)?))|t%(a%(g%(s|alog|banwa)|mil|i%( %(viet|xuan jing symbols|le|tham)|viet|le|_%(xuan_jing_symbols|viet|le|tham)|tham|xuanjingsymbols))|ha%(i|ana)|elugu|i%(finagh|betan)|ransport%(_and_map_symbols| and map symbols|andmapsymbols))|n%(ew%(_tai_lue| tai lue|tailue)|umber%([ _]forms|forms)|ko)|m%(iscellaneous%(_%(mathematical_symbols_[ab]|symbols%(_and_%(arrows|pictographs))?|technical)|mathematicalsymbols-[ab]|technical| %(symbols%( and %(arrows|pictographs))?|mathematical symbols-[ab]|technical)|symbols%(and%(arrows|pictographs))?)|eetei%(mayek|[_ ]mayek)|a%(ndaic|thematical%(alphanumericsymbols|operators|_%(alphanumeric_symbols|operators)| %(operators|alphanumeric symbols))|hjong%(tiles|[_ ]tiles)|layalam)|yanmar%(_extended_a|extended-a| extended-a)?|o%(difier%( tone letters|toneletters|_tone_letters)|ngolian)|usical%(symbols|[_ ]symbols))|p%(h%(a%(gs[-_]pa|istos%(disc|[_ ]disc))|o%(netic%(_extensions%(_supplement)?| extensions%( supplement)?|extensions%(supplement)?)|enician))|rivate%(usearea|_use_area| use area)|laying%(cards|[_ ]cards))|javanese|u%(garitic|nified%(_canadian_aboriginal_syllabics%(_extended)?| canadian aboriginal syllabics%( extended)?|canadianaboriginalsyllabics%(extended)?))|o%(riya|gham|l%([ _]chiki|d%(turkic|_%(south_arabian|turkic|italic|persian)|italic|persian| %(south arabian|turkic|italic|persian)|southarabian)|chiki)|smanya|ptical%(_character_recognition| character recognition|characterrecognition))|v%(e%(dic%(extensions|[_ ]extensions)|rtical%([ _]forms|forms))|a%(i|riation%(_selectors%(_supplement)?| selectors%( supplement)?|selectors%(supplement)?)))|a%(vestan|l%(phabetic%(_presentation_forms| presentation forms|presentationforms)|chemical%(symbols|[_ ]symbols))|ncient%( %(symbols|greek %(musical notation|numbers))|symbols|greek%(musicalnotation|numbers)|_%(symbols|greek_%(musical_notation|numbers)))|egean%([ _]numbers|numbers)|r%(menian|rows|abic%( %(presentation forms-[ab]|supplement)|_%(presentation_forms_[ab]|supplement)|presentationforms-[ab]|supplement)?))|i%(nscriptional%(pa%(rthian|hlavi)|%([_ ]pa%(rthian|hlavi)))|deographic%( description characters|descriptioncharacters|_description_characters)|pa%(extensions|[_ ]extensions)|mperial%(aramaic|[_ ]aramaic))|yi%(%([_ ]%(radicals|syllables))|radicals|syllables|jing%(hexagramsymbols|_hexagram_symbols| hexagram symbols))|k%(a%(yah%(li|[_ ]li)|n%(nada|bun|a%([_ ]supplement|supplement)|gxi%(radicals|[_ ]radicals))|ithi|takana%(phoneticextensions|_phonetic_extensions| phonetic extensions)?)|h%(aroshthi|mer%(symbols|[_ ]symbols)?))|d%(e%(vanagari%([ _]extended|extended)?|seret)|omino%(tiles|[_ ]tiles)|ingbats)|g%(lagolitic|othic|reek%(andcoptic| %(and coptic|extended)|extended|_extended)?|u%(rmukhi|jarati)|e%(o%(metric%([_ ]shapes|shapes)|rgian%([_ ]supplement|supplement)?)|neral%(punctuation|[_ ]punctuation)))|e%(nclosed%( %(ideographic supplement|cjk letters and months|alphanumeric%( supplement|s))|cjklettersandmonths|_%(ideographic_supplement|alphanumeric%(_supplement|s)|cjk_letters_and_months)|alphanumerics%(upplement)?|ideographicsupplement)|moticons|thiopic%(extended%(-a)?| %(extended%(-a)?|supplement)|_%(extended%(_a)?|supplement)|supplement)?|gyptian%(hieroglyphs|[_ ]hieroglyphs))|r%(ejang|u%(nic|mi%(numeralsymbols|_numeral_symbols| numeral symbols)))|c%(jk%(_%(compatibility%(_%(ideographs%(_supplement)?|forms))?|radicals_supplement|unified_ideographs%(_extension_[dacb])?|s%(trokes|ymbols_and_punctuation))|compatibility%(ideographs%(supplement)?|forms)?|unifiedideographs%(extension[dacb])?|radicalssupplement|s%(ymbolsandpunctuation|trokes)| %(unified ideographs%( extension [dacb])?|s%(trokes|ymbols and punctuation)|radicals supplement|compatibility%( %(forms|ideographs%( supplement)?))?))|h%(am|erokee)|u%(neiform%(_numbers_and_punctuation| numbers and punctuation|numbersandpunctuation)?|rrency%(symbols|[_ ]symbols))|y%(rillic%(_%(extended_[ab]|supplementary)| %(extended-[ab]|supplement%(ary)?)|extended-[ab]|supplement%(ary)?)?|priot%([_ ]syllabary|syllabary))|o%(ntrol%(pictures|[_ ]pictures)|unting%(rodnumerals|_rod_numerals| rod numerals)|m%(bining%(halfmarks|diacriticalmarks%(forsymbols|supplement)?| %(marks for symbols|half marks|diacritical marks%( %(for symbols|supplement))?)|marksforsymbols|_%(marks_for_symbols|half_marks|diacritical_marks%(_supplement)?))|mon%( indic number forms|indicnumberforms|_indic_number_forms))|ptic)|arian)|h%(i%(ragana|gh%(_%(private_use_surrogates|surrogates)| %(private use surrogates|surrogates)|surrogates|privateusesurrogates))|ebrew|a%(n%(unoo|gul%(jamo%(extended-[ab])?| %(jamo%( extended-[ab])?|syllables|compatibility jamo)|_%(syllables|jamo%(_extended_[ab])?|compatibility_jamo)|syllables|compatibilityjamo))|lfwidth%( and fullwidth forms|andfullwidthforms|_and_fullwidth_forms))))\}" contained display syntax match clojureRegexpPredefinedCharClass "\v%(\\[dDsSwW]|\.)" contained display syntax cluster clojureRegexpCharPropertyClasses contains=clojureRegexpPosixCharClass,clojureRegexpJavaCharClass,clojureRegexpUnicodeCharClass