diff --git a/HOWTO.rst b/HOWTO.rst index 7885885..b93212b 100644 --- a/HOWTO.rst +++ b/HOWTO.rst @@ -68,6 +68,8 @@ Defining Rules - ``star(zero, or_more)`` - ``plus(one, or_more)`` - ``_or(one, of, these)`` + - ``lookahead(this, stuff)`` # checks the content, but without consuming its tokens. + - ``notahead(this, stuff)`` # checks the content, and matches only if it *does not* match. - ``_not(this, stuff)`` # checks the content, and if it *does not* match, consumes **a single** token. - ``TOKEN_NAME`` diff --git a/codetalker/c/parser.c b/codetalker/c/parser.c index 01378b4..5216e34 100644 --- a/codetalker/c/parser.c +++ b/codetalker/c/parser.c @@ -471,6 +471,38 @@ struct cParseNode* check_special(unsigned int rule, struct RuleSpecial special, _kill_ptree(current); DEDENT(); return NULL; + } else if (special.type == LOOKAHEAD) { + LOG("LOOKAHEAD\n"); + at = tokens->at; + tmp = parse_children(rule, special.option, grammar, tokens, error); + if (tmp == NULL) { + LOG("No match.\n"); + _kill_ptree(current); + tokens->at = at; + DEDENT(); + return NULL; + } + LOG("Matched.\n"); + _kill_ptree(tmp); + tokens->at = at; + DEDENT(); + return current; + } else if (special.type == NOTAHEAD) { + LOG("NOTAHEAD\n"); + at = tokens->at; + tmp = parse_children(rule, special.option, grammar, tokens, error); + if (tmp == NULL) { + LOG("No match.\n"); + tokens->at = at; + DEDENT(); + return current; + } + LOG("Matched.\n"); + _kill_ptree(current); + _kill_ptree(tmp); + tokens->at = at; + DEDENT(); + return NULL; } else { LOG("unknown special type: %d\n", special.type); _kill_ptree(current); diff --git a/codetalker/c/parser.h b/codetalker/c/parser.h index 150b726..825cd06 100644 --- a/codetalker/c/parser.h +++ b/codetalker/c/parser.h @@ -64,7 +64,7 @@ enum RuleItemType { }; enum RuleSpecialType { - STAR, PLUS, QUESTION, OR, STRAIGHT, NOT, NOIGNORE + STAR, PLUS, QUESTION, OR, STRAIGHT, NOT, NOIGNORE, LOOKAHEAD, NOTAHEAD }; struct RuleSpecial { diff --git a/codetalker/cgrammar.pyx b/codetalker/cgrammar.pyx index d03c13e..705bb8a 100644 --- a/codetalker/cgrammar.pyx +++ b/codetalker/cgrammar.pyx @@ -125,7 +125,7 @@ cdef extern from "c/parser.h": LITERAL, RULE, TOKEN, SPECIAL enum RuleSpecialType: - STAR, PLUS, QUESTION, OR, STRAIGHT, NOIGNORE, NOT + STAR, PLUS, QUESTION, OR, STRAIGHT, NOT, NOIGNORE, LOOKAHEAD, NOTAHEAD struct RuleSpecial: RuleSpecialType type @@ -535,6 +535,10 @@ cdef RuleItem convert_item(object item, bint from_or=False): citem.value.special.type = QUESTION elif item[0] == '!': citem.value.special.type = NOT + elif item[0] == '@': + citem.value.special.type = LOOKAHEAD + elif item[0] == '~': + citem.value.special.type = NOTAHEAD elif item[0] == 'i': citem.value.special.type = NOIGNORE # TODO: implement non-greedy +? and *? diff --git a/codetalker/pgm/special.py b/codetalker/pgm/special.py index ebfe893..07799bd 100644 --- a/codetalker/pgm/special.py +++ b/codetalker/pgm/special.py @@ -27,6 +27,10 @@ class no_ignore(Special): char = 'i' class _not(Special): char = '!' +class lookahead(Special): + char = '@' +class notahead(Special): + char = '~' import string diff --git a/tests/parse/lookahead.py b/tests/parse/lookahead.py new file mode 100644 index 0000000..e1883d9 --- /dev/null +++ b/tests/parse/lookahead.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +from codetalker import pgm +from codetalker.pgm.tokens import ANY, ID, NUMBER, STRING, WHITE +from codetalker.pgm.special import lookahead, notahead +from codetalker.pgm.grammar import ParseError + +def sentence(rule): + rule | (subjects, "verb", objects, ["and", subjects, "verb", objects], ".") + +def subjects(rule): + rule | ("noun", ["and", "noun"], lookahead("verb")) + +def objects(rule): + rule | ("noun", ["and", "noun", notahead("verb")]) + +grammar = pgm.Grammar(start=sentence, tokens=[ID, WHITE, ANY], ignore=[WHITE]) + +def test_one(): + text = 'noun verb noun.' + tree = grammar.process(text) + assert str(tree) == text + +def test_two(): + text = 'noun and noun verb noun.' + tree = grammar.process(text) + assert str(tree) == text + +def test_three(): + text = 'noun verb noun and noun.' + tree = grammar.process(text) + assert str(tree) == text + +def test_four(): + text = 'noun verb noun and noun verb noun.' + tree = grammar.process(text) + assert str(tree) == text + +if __name__ == '__main__': + for name, fn in globals().items(): + if name.startswith('test_'): + fn() + print 'test passed' + print 'Finished!' + +# vim: et sw=4 sts=4