Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New special tokens: lookahead and notahead #12

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions HOWTO.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ Defining Rules
- ``star(zero, or_more)``
- ``plus(one, or_more)``
- ``_or(one, of, these)``
- ``lookahead(this, stuff)`` # checks the content, but without consuming its tokens.
- ``notahead(this, stuff)`` # checks the content, and matches only if it *does not* match.
- ``_not(this, stuff)`` # checks the content, and if it *does not* match,
consumes **a single** token.
- ``TOKEN_NAME``
Expand Down
32 changes: 32 additions & 0 deletions codetalker/c/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,38 @@ struct cParseNode* check_special(unsigned int rule, struct RuleSpecial special,
_kill_ptree(current);
DEDENT();
return NULL;
} else if (special.type == LOOKAHEAD) {
LOG("LOOKAHEAD\n");
at = tokens->at;
tmp = parse_children(rule, special.option, grammar, tokens, error);
if (tmp == NULL) {
LOG("No match.\n");
_kill_ptree(current);
tokens->at = at;
DEDENT();
return NULL;
}
LOG("Matched.\n");
_kill_ptree(tmp);
tokens->at = at;
DEDENT();
return current;
} else if (special.type == NOTAHEAD) {
LOG("NOTAHEAD\n");
at = tokens->at;
tmp = parse_children(rule, special.option, grammar, tokens, error);
if (tmp == NULL) {
LOG("No match.\n");
tokens->at = at;
DEDENT();
return current;
}
LOG("Matched.\n");
_kill_ptree(current);
_kill_ptree(tmp);
tokens->at = at;
DEDENT();
return NULL;
} else {
LOG("unknown special type: %d\n", special.type);
_kill_ptree(current);
Expand Down
2 changes: 1 addition & 1 deletion codetalker/c/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ enum RuleItemType {
};

enum RuleSpecialType {
STAR, PLUS, QUESTION, OR, STRAIGHT, NOT, NOIGNORE
STAR, PLUS, QUESTION, OR, STRAIGHT, NOT, NOIGNORE, LOOKAHEAD, NOTAHEAD
};

struct RuleSpecial {
Expand Down
6 changes: 5 additions & 1 deletion codetalker/cgrammar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ cdef extern from "c/parser.h":
LITERAL, RULE, TOKEN, SPECIAL

enum RuleSpecialType:
STAR, PLUS, QUESTION, OR, STRAIGHT, NOIGNORE, NOT
STAR, PLUS, QUESTION, OR, STRAIGHT, NOT, NOIGNORE, LOOKAHEAD, NOTAHEAD

struct RuleSpecial:
RuleSpecialType type
Expand Down Expand Up @@ -535,6 +535,10 @@ cdef RuleItem convert_item(object item, bint from_or=False):
citem.value.special.type = QUESTION
elif item[0] == '!':
citem.value.special.type = NOT
elif item[0] == '@':
citem.value.special.type = LOOKAHEAD
elif item[0] == '~':
citem.value.special.type = NOTAHEAD
elif item[0] == 'i':
citem.value.special.type = NOIGNORE
# TODO: implement non-greedy +? and *?
Expand Down
4 changes: 4 additions & 0 deletions codetalker/pgm/special.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class no_ignore(Special):
char = 'i'
class _not(Special):
char = '!'
class lookahead(Special):
char = '@'
class notahead(Special):
char = '~'

import string

Expand Down
46 changes: 46 additions & 0 deletions tests/parse/lookahead.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/usr/bin/env python

from codetalker import pgm
from codetalker.pgm.tokens import ANY, ID, NUMBER, STRING, WHITE
from codetalker.pgm.special import lookahead, notahead
from codetalker.pgm.grammar import ParseError

def sentence(rule):
rule | (subjects, "verb", objects, ["and", subjects, "verb", objects], ".")

def subjects(rule):
rule | ("noun", ["and", "noun"], lookahead("verb"))

def objects(rule):
rule | ("noun", ["and", "noun", notahead("verb")])

grammar = pgm.Grammar(start=sentence, tokens=[ID, WHITE, ANY], ignore=[WHITE])

def test_one():
text = 'noun verb noun.'
tree = grammar.process(text)
assert str(tree) == text

def test_two():
text = 'noun and noun verb noun.'
tree = grammar.process(text)
assert str(tree) == text

def test_three():
text = 'noun verb noun and noun.'
tree = grammar.process(text)
assert str(tree) == text

def test_four():
text = 'noun verb noun and noun verb noun.'
tree = grammar.process(text)
assert str(tree) == text

if __name__ == '__main__':
for name, fn in globals().items():
if name.startswith('test_'):
fn()
print 'test passed'
print 'Finished!'

# vim: et sw=4 sts=4