Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

First draft of Kwim grammar in Pegex

Only top level blocks have be done so far.

Need to test this then subparse phrases.
  • Loading branch information...
commit e0ffd757a0376755ab0907881f2fccbf9c8e144f 1 parent 673e342
@ingydotnet authored
Showing with 301 additions and 0 deletions.
  1. +12 −0 Makefile
  2. +148 −0 kwim.pgx
  3. +91 −0 kwim.pgx.json
  4. +50 −0 kwim.pgx.yaml
View
12 Makefile
@@ -0,0 +1,12 @@
+GRAMMAR = *.pgx
+export PERL5LIB=../pegex-pm/lib:../pegex-cmd-pm/lib
+STYLE ?= NORMAL
+NORMAL_COMMAND = pegex compile --to=
+BOOT_COMMAND = pegex compile --boot --to=
+COMMAND = ${${STYLE}_COMMAND}
+ALL = $(GRAMMAR).yaml $(GRAMMAR).json
+
+all: $(ALL)
+
+$(ALL): $(GRAMMAR) Makefile
+ $(COMMAND)$(@:$<.%=%) $< > $@
View
148 kwim.pgx
@@ -0,0 +1,148 @@
+%grammar kwim
+%version 0.0.1
+
+# A document is a set of top level blocks of lines (think html DIVs)
+document: top_level_block*
+
+# These are the top level blocks
+top_level_block:
+ comment_line |
+ foreign_block |
+ plugin_block |
+ header |
+ unordered_list |
+ ordered_list |
+ table |
+ quote_block |
+ horizontal_rule |
+ preformatted |
+ blank_line |
+ paragraph |
+ other_line
+
+### Helper rules
+ws: / [<SPACE><TAB>] / # For ~ and ~~
+
+# Plugin function names
+identifier: / [<WORD><DASH>]+ /
+
+# An alias for readability
+blank: / <blank_line> /
+
+# Line containing at least one non-blank
+non_blank_line: / <ANY>*? <NS> <ANY>* <EOL> /
+
+### Top level blocks start here:
+
+# A throwaway comment line
+comment_line: /
+ <HASH> <ANY>* <EOL>
+ <blank>?
+/
+
+# This is a foreign markup inside a Kwim document
+foreign_block: /
+ <DOT> (<identifier>) ~ <EOL>
+ (
+ (:<ANY>*<EOL>)*?
+ )
+ # XXX \1 should be <BACKREF1>. Need to add to Pegex.
+ <DOT> \1 ~ <EOL>
+ <blank>?
+/
+
+plugin_block: /
+ <LCURLY> (<ANY>*) <RCURLY> ~~ <EOL>
+ <blank>?
+/
+
+header: /
+ (<EQUAL>{1,6}) ~~ (<ANY>+) <EOL>
+ <blank>?
+/
+
+unordered_list: /
+ (
+ <STAR>+ <SPACE> <ANY>* <EOL>
+ (:
+ [<STAR><DASH>]+ <SPACE> <ANY>* <EOL>
+ )*
+ )
+ <blank>?
+/
+
+ordered_list: /
+ (
+ <DASH>+ <SPACE> <ANY>* <EOL>
+ (:
+ [<STAR><DASH>]+ <SPACE> <ANY>* <EOL>
+ )*
+ )
+ <blank>?
+/
+
+table: /
+ (
+ (:
+ <PIPE><ANY>*<PIPE><SPACE><EOL>(=<PIPE>)
+ |
+ <PIPE><ALL>*?<PIPE><EOL>
+ )+
+ )
+ <blank>?
+/
+
+quote_block: /
+ (
+ (: <RANGLE> <ANY>* EOL)+
+ )
+ <blank>?
+/
+
+horizontal_rule: / <DASH>{3,} <EOL> /
+
+preformatted: /
+ (
+ # A single line
+ <preformatted_line> |
+ # Or multiple lines
+ (:
+ <preformatted_line>
+ (:
+ <blank_line> |
+ <preformatted_line>
+ )*
+ <preformatted_line>
+ )
+ )
+ # All so we can avoid capturing final blank line
+ <blank>?
+/
+
+preformatted_line: /
+ <SPACE>{4} <non_blank_line>
+/
+
+blank_line: / ~ <EOL> /
+
+paragraph: /
+ (
+ (:
+ (!<starter>) # not another block starter
+ <non_blank_line>
+ )+
+ )
+ <blank>?
+/
+
+# Something that shouldn't start a paragraph line because it starts something
+# else
+starter: /(:
+ [<HASH><DASH>STAR><EQUAL>]+ <SPACE> |
+ [<PIPE><RANGLE>] |
+ <DOT> <identifier> |
+ <LCURLY> [^<RCURLY>] <RCURLY> ~ <EOL>
+)/
+
+other_line: / (<ANY>+) <EOL> /
+
View
91 kwim.pgx.json
@@ -0,0 +1,91 @@
+{
+ "+grammar" : "kwim",
+ "+toprule" : "document",
+ "+version" : "0.0.1",
+ "blank_line" : {
+ ".rgx" : "[ \\t]*\\r?\\n"
+ },
+ "comment_line" : {
+ ".rgx" : "\\#.*\\r?\\n[ \\t]*\\r?\\n?"
+ },
+ "document" : {
+ "+min" : 0,
+ ".ref" : "top_level_block"
+ },
+ "foreign_block" : {
+ ".rgx" : "\\.([\\w\\-]+)[ \\t]*\\r?\\n((?:.*\\r?\\n)*?)\\.\\1[ \\t]*\\r?\\n[ \\t]*\\r?\\n?"
+ },
+ "header" : {
+ ".rgx" : "(={1,6})[ \\t]+(.+)\\r?\\n[ \\t]*\\r?\\n?"
+ },
+ "horizontal_rule" : {
+ ".rgx" : "\\-{3,}\\r?\\n"
+ },
+ "ordered_list" : {
+ ".rgx" : "(\\-+ .*\\r?\\n(?:[\\*\\-]+ .*\\r?\\n)*)[ \\t]*\\r?\\n?"
+ },
+ "other_line" : {
+ ".rgx" : "(.+)\\r?\\n"
+ },
+ "paragraph" : {
+ ".rgx" : "((?:(?!(?:[\\#\\-STAR>=]+ |[\\|>]|\\.[\\w\\-]+|\\{[^\\}]\\}[ \\t]*\\r?\\n)).*?\\S.*\\r?\\n)+)[ \\t]*\\r?\\n?"
+ },
+ "plugin_block" : {
+ ".rgx" : "\\{(.*)\\}[ \\t]+\\r?\\n[ \\t]*\\r?\\n?"
+ },
+ "preformatted" : {
+ ".rgx" : "( {4}.*?\\S.*\\r?\\n|(?: {4}.*?\\S.*\\r?\\n(?:[ \\t]*\\r?\\n| {4}.*?\\S.*\\r?\\n)* {4}.*?\\S.*\\r?\\n))[ \\t]*\\r?\\n?"
+ },
+ "quote_block" : {
+ ".rgx" : "((?:>.*EOL)+)[ \\t]*\\r?\\n?"
+ },
+ "table" : {
+ ".rgx" : "((?:\\|.*\\| \\r?\\n(?=\\|)|\\|[\\s\\S]*?\\|\\r?\\n)+)[ \\t]*\\r?\\n?"
+ },
+ "top_level_block" : {
+ ".any" : [
+ {
+ ".ref" : "comment_line"
+ },
+ {
+ ".ref" : "foreign_block"
+ },
+ {
+ ".ref" : "plugin_block"
+ },
+ {
+ ".ref" : "header"
+ },
+ {
+ ".ref" : "unordered_list"
+ },
+ {
+ ".ref" : "ordered_list"
+ },
+ {
+ ".ref" : "table"
+ },
+ {
+ ".ref" : "quote_block"
+ },
+ {
+ ".ref" : "horizontal_rule"
+ },
+ {
+ ".ref" : "preformatted"
+ },
+ {
+ ".ref" : "blank_line"
+ },
+ {
+ ".ref" : "paragraph"
+ },
+ {
+ ".ref" : "other_line"
+ }
+ ]
+ },
+ "unordered_list" : {
+ ".rgx" : "(\\*+ .*\\r?\\n(?:[\\*\\-]+ .*\\r?\\n)*)[ \\t]*\\r?\\n?"
+ }
+}
View
50 kwim.pgx.yaml
@@ -0,0 +1,50 @@
+---
++grammar: kwim
++toprule: document
++version: 0.0.1
+blank_line:
+ .rgx: '[ \t]*\r?\n'
+comment_line:
+ .rgx: \#.*\r?\n[ \t]*\r?\n?
+document:
+ +min: 0
+ .ref: top_level_block
+foreign_block:
+ .rgx: \.([\w\-]+)[ \t]*\r?\n((?:.*\r?\n)*?)\.\1[ \t]*\r?\n[ \t]*\r?\n?
+header:
+ .rgx: (={1,6})[ \t]+(.+)\r?\n[ \t]*\r?\n?
+horizontal_rule:
+ .rgx: \-{3,}\r?\n
+ordered_list:
+ .rgx: (\-+ .*\r?\n(?:[\*\-]+ .*\r?\n)*)[ \t]*\r?\n?
+other_line:
+ .rgx: (.+)\r?\n
+paragraph:
+ .rgx: ((?:(?!(?:[\#\-STAR>=]+ |[\|>]|\.[\w\-]+|\{[^\}]\}[ \t]*\r?\n)).*?\S.*\r?\n)+)[
+ \t]*\r?\n?
+plugin_block:
+ .rgx: \{(.*)\}[ \t]+\r?\n[ \t]*\r?\n?
+preformatted:
+ .rgx: '( {4}.*?\S.*\r?\n|(?: {4}.*?\S.*\r?\n(?:[ \t]*\r?\n| {4}.*?\S.*\r?\n)* {4}.*?\S.*\r?\n))[
+ \t]*\r?\n?'
+quote_block:
+ .rgx: ((?:>.*EOL)+)[ \t]*\r?\n?
+table:
+ .rgx: ((?:\|.*\| \r?\n(?=\|)|\|[\s\S]*?\|\r?\n)+)[ \t]*\r?\n?
+top_level_block:
+ .any:
+ - .ref: comment_line
+ - .ref: foreign_block
+ - .ref: plugin_block
+ - .ref: header
+ - .ref: unordered_list
+ - .ref: ordered_list
+ - .ref: table
+ - .ref: quote_block
+ - .ref: horizontal_rule
+ - .ref: preformatted
+ - .ref: blank_line
+ - .ref: paragraph
+ - .ref: other_line
+unordered_list:
+ .rgx: (\*+ .*\r?\n(?:[\*\-]+ .*\r?\n)*)[ \t]*\r?\n?
Please sign in to comment.
Something went wrong with that request. Please try again.