In [1]:
# --- This is not needed if NXP is installed --- #
import sys
import os.path as op

# add the src/ directory to the Python path
sys.path.insert(0,op.realpath('../src'))
import nxp

## Fenced expressions

### Identical boundaries

In [2]:
# change an option to see how it affects the results
p = nxp.Fenced( '|', esc=True, empty=False )
c = nxp.make_cursor('normal |foo|, non-empty || |, escaped |\||')

for m in p.findall(c): print(m.insitu(c.buffer))

normal |foo|, non-empty |
       -----             
, non-empty || |, escaped |\|
             ---             
| |, escaped |\||
             ----


- With `empty=False`, the non-empty segment is matched as `| |`, and **not** `|| |`. The first `|` is consumed by the cursor as a failed match, because `||` is invalid.

- The results with `empty=True` are the same regardless of `esc`. In particular with `esc=True`, the last match is `||`, even though it is _preceded_ by `\`. Since the backslash is outside the match, it is not considered as an escape character.

### Different L/R boundaries

In [3]:
p = nxp.Fenced( ('(',')'), esc=True, empty=False )
c = nxp.make_cursor('normal (foo), non-empty ()), escaped (\))')

for m in p.findall(c): print(m.insitu(c.buffer))

normal (foo), non-empty (
       -----             
()), escaped (\))
             ----


### Word boundaries

In [4]:
# with larger boundaries
p = nxp.Fenced( ('\\left','\\right'), esc=False, empty=True )
c = nxp.make_cursor('normal \\leftfoo\\right, empty \\left\\right\\right, partial \\left\\ri\\right')

for m in p.findall(c): print(m.insitu(c.buffer))

normal \leftfoo\right, empty \left
       --------------             
right, empty \left\right\right, parti
             -----------             
ght, partial \left\ri\right
             --------------


Fenced expressions with boundaries that are larger than single characters cannot use `esc=True`.

## Multiplicities

In [5]:
test = [ 1, 2, '1', '1-3', '2-5','4+', '5-', (3,7), [1,'2-5'], range(3,7,2) ]

for t in test: 
    print('%s: %s' % (t,nxp.mulparse(t)))


1: [(1, 1)]
2: [(2, 2)]
1: [(1, 1)]
1-3: [(1, 3)]
2-5: [(2, 5)]
4+: [(4, inf)]
5-: [(0, 5)]
(3, 7): [(3, 7)]
[1, '2-5']: [(1, 1), (2, 5)]
range(3, 7, 2): range(3, 7, 2)


## HTML tags

In [6]:
from nxp import Seq, String, Either, Any

# property name, optionally assigned a value between quotes
attr = Seq( [r'\s+(\w+)', Seq([ r'\s*=\s*', String() ])], skip=1 )

# open/close tags, or self-closed tag
tag = Either(
    Seq( [r'<(\w+)', Any(attr), r'\s*/?>'] ), 
    r'</(\w+)\s*>'
)

# create cursor and find matches
cur = nxp.make_cursor(' '.join([
    'Not <a><tag</a>',
    '<input type="checkbox" value="42" checked>',
    '<img src="foo/bar.jpg" />'
]))

for m in tag.findall(cur): print(m.insitu(cur.buffer))

Not <a><tag</a> <inp
    ---             
Not <a><tag</a> <input type=
           ----             
 <a><tag</a> <input type="checkbox" value="42" checked> <img src="fo
             ------------------------------------------             
42" checked> <img src="foo/bar.jpg" />
             -------------------------
