Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clj/project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
:license {:name "Vim License"
:url "http://vimdoc.sourceforge.net/htmldoc/uganda.html#license"
:comments ":help license"}
:dependencies [[org.clojure/clojure "1.5.1"]])
:dependencies [[org.clojure/clojure "1.5.1"]
[frak "0.1.3"]])
70 changes: 52 additions & 18 deletions clj/src/vim_clojure_static/generate.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,34 @@

(ns vim-clojure-static.generate
(:require [clojure.string :as string]
[clojure.set :as set]))
[clojure.set :as set]
[frak]))

;;
;; Helpers
;;

(defn vim-frak-pattern
"Create a non-capturing regular expression pattern compatible with Vim."
[strs]
(-> (str (frak/pattern strs))
(string/replace #"\(\?:" "\\%\\(")))

(defn property-pattern
"Vimscript very magic pattern for a character property class."
([s] (property-pattern s true))
([s braces?] (if braces?
(format "\\v\\\\[pP]\\{%s\\}" s)
(format "\\v\\\\[pP]%s" s))))
([s braces?]
(if braces?
(format "\\v\\\\[pP]\\{%s\\}" s)
(format "\\v\\\\[pP]%s" s))))

(defn syntax-match-properties
"Vimscript literal `syntax match` for a character property class."
([group fmt props] (syntax-match-properties group fmt props true))
([group fmt props braces?]
(format "syntax match %s \"%s\" contained display\n"
(name group)
(property-pattern (format fmt (string/join \| (sort props))) braces?))))
(property-pattern (format fmt (vim-frak-pattern props)) braces?))))

(defn get-private-field
"Violate encapsulation and get the value of a private field."
Expand Down Expand Up @@ -141,15 +149,15 @@
;; `IsPosix` works, but is undefined.
(syntax-match-properties
:clojureRegexpPosixCharClass
"%%(%s)"
"%s"
(:posix character-properties)))

(def vim-java-char-classes
"Vimscript literal `syntax match` for \\p{javaMethod} property classes."
;; `IsjavaMethod` works, but is undefined.
(syntax-match-properties
:clojureRegexpJavaCharClass
"java%%(%s)"
"java%s"
(map #(string/replace % #"\Ajava" "") (:java character-properties))))

(def vim-unicode-binary-char-classes
Expand All @@ -158,26 +166,32 @@
;; insensitively like the other Unicode properties.
(syntax-match-properties
:clojureRegexpUnicodeCharClass
"\\cIs%%(%s)"
"\\cIs%s"
(map string/lower-case (:binary character-properties))))

(def vim-unicode-category-char-classes
"Vimscript literal `syntax match` for Unicode General Category classes."
(let [cats (map seq (:category character-properties))
cats (map (fn [[c subcats]]
(format "%s[%s]" c (apply str (sort (mapcat rest subcats)))))
(group-by first cats))]
(let [cats (sort (:category character-properties))
chrs (->> (map seq cats)
(group-by first)
(keys)
(map str)
(sort))]
;; gc= and general_category= can be case insensitive, but this is behavior
;; is undefined.
(str
(syntax-match-properties
:clojureRegexpUnicodeCharClass
"%%(%s)"
(sort (filter #(= (count %) 1) (:category character-properties)))
"%s"
chrs
false)
(syntax-match-properties
:clojureRegexpUnicodeCharClass
"%%(Is|gc\\=|general_category\\=)?%%(%s)"
"%s"
cats)
(syntax-match-properties
:clojureRegexpUnicodeCharClass
"%%(Is|gc\\=|general_category\\=)?%s"
cats))))

(def vim-unicode-script-char-classes
Expand All @@ -189,7 +203,7 @@
;; InScriptName works, but is undefined.
(syntax-match-properties
:clojureRegexpUnicodeCharClass
"\\c%%(Is|sc\\=|script\\=)%%(%s)"
"\\c%%(Is|sc\\=|script\\=)%s"
(map string/lower-case (:script character-properties))))

(def vim-unicode-block-char-classes
Expand All @@ -198,10 +212,26 @@
;; of Is.
(syntax-match-properties
:clojureRegexpUnicodeCharClass
"\\c%%(In|blk\\=|block\\=)%%(%s)"
"\\c%%(In|blk\\=|block\\=)%s"
(map string/lower-case (:block character-properties))))

(def comprehensive-clojure-character-property-regexps
"A string representing a Clojure literal vector of regular expressions
containing all possible property character classes. For testing Vimscript
syntax matching optimizations."
(let [fmt (fn [prefix prop-key]
(let [props (map (partial format "\\p{%s%s}" prefix)
(sort (get character-properties prop-key)))]
(format "#\"%s\"" (string/join props))))]
(string/join \newline [(fmt "" :posix)
(fmt "" :java)
(fmt "Is" :binary)
(fmt "general_category=" :category)
(fmt "script=" :script)
(fmt "block=" :block)])))

(comment
;; Generate the vim literal definitions for pasting into the runtime files.
(spit "tmp/clojure-defs.vim"
(str generation-comment
clojure-version-comment
Expand All @@ -218,4 +248,8 @@
vim-unicode-binary-char-classes
vim-unicode-category-char-classes
vim-unicode-script-char-classes
vim-unicode-block-char-classes)))
vim-unicode-block-char-classes))
;; Generate an example file with all possible character property literals.
(spit "tmp/all-char-props.clj"
comprehensive-clojure-character-property-regexps))

46 changes: 45 additions & 1 deletion clj/src/vim_clojure_static/test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
[file & lines]
(io/make-parents file)
(spit file (string/join \newline lines))
(shell/sh "vim" "-u" "NONE" "-N" "-S" "vim/syn-id-names.vim" file)
(shell/sh "vim" "-u" "NONE" "-N" "-S" "vim/test-runtime.vim" file)
;; The last line of the file will contain valid EDN
(into {} (map (fn [l ids] [l (mapv keyword ids)])
lines
Expand Down Expand Up @@ -69,6 +69,42 @@
ss λs)))
contexts)))))

(defn vim-nfa-dump
"Run a patched version of Vim compiled with -DDEBUG on a new file containing
buffer, then move the NFA log to log-path. The patch is located at
vim/custom-nfa-log.patch"
[vim-path buffer log-path]
(let [file "tmp/nfa-test-file.clj"]
(spit file buffer)
(time (shell/sh vim-path "-u" "NONE" "-N" "-S" "vim/test-runtime.vim" file))
(shell/sh "mv" "nfa_regexp.log" log-path)))

(defn compare-nfa-dumps
"Dump NFA logs with given buffer and syntax-files; log-files are written to
tmp/ and are distinguished by the hash of the buffer and syntax script.

The vim-path passed to vim-nfa-dump should either be in the VIMDEBUG
environment variable, or be the top vim in your PATH.

Returns the line count of each corresponding log file."
[buf [& syntax-files] & opts]
(let [{:keys [vim-path]
:or {vim-path (or (System/getenv "VIMDEBUG") "vim")}} opts
syn-path "../syntax/clojure.vim"
orig-syn (slurp syn-path)
buf-hash (hash buf)]
(try
(mapv (fn [path]
(let [syn-buf (slurp path)
syn-hash (hash syn-buf)
log-path (format "tmp/debug:%d:%d.log" buf-hash syn-hash)]
(spit syn-path syn-buf)
(vim-nfa-dump vim-path buf log-path)
(count (re-seq #"\n" (slurp log-path)))))
syntax-files)
(finally
(spit syn-path orig-syn)))))

(comment

(macroexpand-1
Expand All @@ -80,4 +116,12 @@
["^" #(= % [:clojureRegexpBoundary])]]))
(test #'number-literals-test)

(defn dump! [buf]
(compare-nfa-dumps (format "#\"\\p{%s}\"\n" buf)
["../syntax/clojure.vim" "tmp/altsyntax.vim"]))

(dump! "Ll")
(dump! "javaLowercase")
(dump! "block=UNIFIED CANADIAN ABORIGINAL SYLLABICS")

)
31 changes: 29 additions & 2 deletions clj/test/vim_clojure_static/syntax_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,28 @@

(comment (test #'number-literals-test))

;; TODO: Finish me! (this was in an old git stash)
;; (defsyntaxtest keywords-test
;; (with-format "%s"
;; ":1" kw
;; ":A" kw
;; ":a" kw
;; ":αβγ" kw
;; "::a" kw
;; ":a/b" kw
;; ":a:b" kw
;; ":a:b/:c:b" kw
;; ":a/b/c/d" kw
;; "::a/b" !kw
;; "::" !kw
;; ":a:" !kw
;; ":a/" !kw
;; ":/" !kw
;; ":" !kw
;; ))
;;
;; (comment (test #'keywords-test))

(defsyntaxtest java-regexp-literals-test
["#\"%s\""
[;; http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
Expand Down Expand Up @@ -221,8 +243,6 @@
"\\p{IsLatin}" regexp-unicode-char-class
;; \p{InGreek} A character in the Greek block (block)
"\\p{InGreek}" regexp-unicode-char-class
;; \p{Lu} An uppercase letter (category)
"\\p{Lu}" regexp-unicode-char-class
;; \p{IsAlphabetic} An alphabetic character (binary property)
"\\p{IsAlphabetic}" regexp-unicode-char-class
;; \p{Sc} A currency symbol
Expand All @@ -231,6 +251,13 @@
"\\P{InGreek}" regexp-unicode-char-class
;; [\p{L}&&[^\p{Lu}]] Any letter except an uppercase letter (subtraction)

;; Abbreviated categories
"\\pL" regexp-unicode-char-class
"\\p{L}" regexp-unicode-char-class
"\\p{Lu}" regexp-unicode-char-class
"\\p{gc=L}" regexp-unicode-char-class
"\\p{IsLu}" regexp-unicode-char-class

;;;; Invalid classes

"\\P{Xzibit}" !regexp-posix-char-class
Expand Down
75 changes: 75 additions & 0 deletions clj/vim/custom-nfa-log.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
commit 09b1321fd7fa91e5a8e36ebc2d858079969adca4 (HEAD, github/custom-nfa-log, custom-nfa-log)
Author: guns <self@sungpae.com>
Date: Thu, 1 Aug 2013 10:56:19 -0500

Add custom NFA logging to nfa_regexp.log

The goal is to get a ballpark estimate of the number of steps the regexp
engine undertakes in a session. One line of logging is done:

* Start of nfa_regmatch()
* For each character considered
* For each state of a character considered

Vimm should be compiled with -DDEBUG; if the NFA engine state graphs are
desired, then also use -DINCLUDE_NFA_DUMP.

This branch can also be found at:

https://github.com/guns/vim/tree/custom-nfa-log
---
src/regexp_nfa.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 738ac3b..bea9d50 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -24,9 +24,18 @@
#ifdef DEBUG
# define NFA_REGEXP_ERROR_LOG "nfa_regexp_error.log"
# define ENABLE_LOG
-# define NFA_REGEXP_DUMP_LOG "nfa_regexp_dump.log"
-# define NFA_REGEXP_RUN_LOG "nfa_regexp_run.log"
-# define NFA_REGEXP_DEBUG_LOG "nfa_regexp_debug.log"
+# ifdef INCLUDE_NFA_DUMP
+# define NFA_REGEXP_DUMP_LOG "nfa_regexp.log"
+# else
+# define NFA_REGEXP_DUMP_LOG "/dev/null"
+# endif
+# define NFA_REGEXP_RUN_LOG "/dev/null"
+# define NFA_REGEXP_DEBUG_LOG "/dev/null"
+# define LOG(fmt, ...) do { \
+ FILE *log = fopen("nfa_regexp.log", "a"); \
+ fprintf(log, fmt, __VA_ARGS__); \
+ fclose(log); \
+} while (0)
#endif

enum
@@ -5043,6 +5052,8 @@ nfa_regmatch(prog, start, submatch, m)
goto theend;

#ifdef ENABLE_LOG
+ LOG("START nfa_regmatch: alloc=%d pattern=\"%s\"\n", size*2, prog->pattern);
+
log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
if (log_fd != NULL)
{
@@ -5129,6 +5140,8 @@ nfa_regmatch(prog, start, submatch, m)
nextlist->id = nfa_listid + 1;

#ifdef ENABLE_LOG
+ LOG("# states=%d reginput=\"%s\"\n", thislist->n, reginput);
+
fprintf(log_fd, "------------------------------------------\n");
fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput);
fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc);
@@ -5161,6 +5174,7 @@ nfa_regmatch(prog, start, submatch, m)
fprintf(debug, "%s, ", code);
#endif
#ifdef ENABLE_LOG
+ LOG("## computing nextlist: code=\"%s\"\n", code);
{
int col;

15 changes: 11 additions & 4 deletions clj/vim/syn-id-names.vim → clj/vim/test-runtime.vim
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,23 @@
execute 'set rtp=' . expand('%:p:h:h:h') . ',$VIMRUNTIME'
filetype plugin on
syntax on
set synmaxcol=0
setfiletype clojure

function! s:append_syn_id_names()
if !exists('g:testing')
let g:testing = 1
endif

function! s:syn_id_names()
let names = []
for lnum in range(1, line('$'))
let f = 'synIDattr(synID(' . lnum . ', v:val, 0), "name")'
call add(names, map(range(1, virtcol([lnum, '$']) - 1), f))
endfor
" Changing the quotes will make this valid EDN
call append(line('$'), tr(string(names), "'", '"'))
return names
endfunction

call s:append_syn_id_names() | write | quitall!
if g:testing
" Changing the quotes will make this valid EDN
call append(line('$'), tr(string(s:syn_id_names()), "'", '"')) | write | quitall!
endif
5 changes: 5 additions & 0 deletions indent/clojure.vim
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
" License: Same as Vim
" Last Change: 30 January 2013

" TODO: Indenting after multibyte characters is broken:
" (let [Δ (if foo
" bar ; Indent error
" baz)])

if exists("b:did_indent")
finish
endif
Expand Down
Loading