Skip to content

Commit

Permalink
Merge pull request #71 from goto40/master-regexp
Browse files Browse the repository at this point in the history
Regular expression with group support
  • Loading branch information
igordejanovic committed Jul 8, 2018
2 parents 2439126 + d09ef02 commit 9f85d62
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 2 deletions.
22 changes: 22 additions & 0 deletions docs/grammar.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,28 @@ There are two types of match expressions:
For more information on Regular Expression in Python see [Regular Expression
HOWTO](https://docs.python.org/3/howto/regex.html).

When the metamodel has the option use_regexp_group enabled
(use_regexp_group=True), then a regular expression with exactly
one group is replaced by the group. This can be used to define multiline
strings to be stored in the model without the surrounding limiters:

Model: 'data' '=' data=/(?ms)\"{3}(.*?)\"{3}/;

An example model could be

data = """
This is a multiline
text!
"""

When creating a metamodel with this grammar and the option
use_regexp_group enabled, a multiline string delimited with '"""'
is accepted: "(?ms)" activates the multiline option and the "dot matches
everything" option. "\"{3}" matches the delimited '"""'. The pattern
'"(.*?)" is a non-greedy variant of "get anything".

metamodel = metamodel_from_str(grammar, use_regexp_group=True)


### Sequence

Expand Down
73 changes: 73 additions & 0 deletions tests/functional/test_regexp_with_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""
Testing model and regexp with groups.
"""
from __future__ import unicode_literals
import pytest # noqa
import sys
from textx import metamodel_from_str

if sys.version < '3':
text = unicode # noqa
else:
text = str

grammar = """
Model: entries += Entry;
Entry:
'data' '=' data=/\"(?ms){3}(.*?)\"{3}/
;
"""
grammar2 = """
Model:
'data' '=' data=/\"(?ms){3}(.*?)\"{3}\s*\-(\w+)\-/
;
"""

def test_regexp_with_groups_deactivated():
"""
Test that the grammar with w/o groups.
"""
model_str = '''
data = """
This is a multiline
text!
"""
'''

metamodel = metamodel_from_str(grammar)
m = metamodel.model_from_str(model_str)

assert '"""' in m.entries[0].data # """ is not removed
assert 'This' in m.entries[0].data # This and text in model
assert 'text!' in m.entries[0].data # This and text in model

def test_regexp_with_groups_activated():
"""
Test that the grammar with w/o groups.
"""
model_str = '''
data = """
This is a multiline
text!
"""
data="""second text"""
'''

metamodel = metamodel_from_str(grammar, use_regexp_group=True)
m = metamodel.model_from_str(model_str)

assert '"""' not in m.entries[0].data # """ is not removed
assert 'This' in m.entries[0].data # This and text in model
assert 'text!' in m.entries[0].data # This and text in model

def test_regexp_with_groups_activated2():
"""
Test that the grammar with with two groups in one regexp.
This will not activate the group replacement
"""
model_str = '''data = """This is a multiline"""-ExtraInfo-'''

metamodel = metamodel_from_str(grammar2, use_regexp_group=True)
m = metamodel.model_from_str(model_str)

assert '"""This is a multiline"""-ExtraInfo-' == m.data
5 changes: 4 additions & 1 deletion textx/metamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,14 @@ class TextXMetaModel(DebugPrinter):
should interact), this attribute must be set via an optional
constructor parameter "global_repository=True" or
"global_repository=GlobalModelRepository()".
use_regexp_group (bool): if True, regexp terminals are
replaced with the group value, if they have exactly one group.
"""

def __init__(self, file_name=None, classes=None, builtins=None,
auto_init_attributes=True, ignore_case=False, skipws=True,
ws=None, autokwd=False, memoization=False,
textx_tools_support=False, **kwargs):
textx_tools_support=False, use_regexp_group=False, **kwargs):
# evaluate optional parameter "global_repository"
global_repository = kwargs.pop("global_repository", False)
if global_repository:
Expand Down Expand Up @@ -136,6 +138,7 @@ def __init__(self, file_name=None, classes=None, builtins=None,
self.autokwd = autokwd
self.memoization = memoization
self.textx_tools_support = textx_tools_support
self.use_regexp_group = use_regexp_group

# Registered model processors
self._model_processors = []
Expand Down
11 changes: 10 additions & 1 deletion textx/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,16 @@ def process_match(nt):

def process_node(node):
if isinstance(node, Terminal):
return convert(node.value, node.rule_name)
from arpeggio import RegExMatch
if metamodel.use_regexp_group and \
isinstance(node.rule, RegExMatch):
if node.rule.regex.groups == 1:
value = node.extra_info.group(1)
return convert(value, node.rule_name)
else:
return convert(node.value, node.rule_name)
else:
return convert(node.value, node.rule_name)

assert node.rule.root, \
"Not a root node: {}".format(node.rule.rule_name)
Expand Down

0 comments on commit 9f85d62

Please sign in to comment.