Regular expressions parser
Python
Switch branches/tags
Nothing to show
Clone or download
Fetching latest commit…
Cannot retrieve the latest commit at this time.
Permalink
Failed to load latest commit information.
README.md
laucha.py

README.md

Laucha, Regular Expressions Parser

Given a regular expression this program builds its parsing tree. The parser is a manually written recursive descent parser from a BNF description of a RE syntax.

Example output:

$ python laucha.py 

(a|b)*aab
[('special', '('),
 ('literal', 'a'),
 ('special', '|'),
 ('literal', 'b'),
 ('special', ')'),
 ('special', '*'),
 ('literal', 'a'),
 ('literal', 'a'),
 ('literal', 'b'),
 ('eos', None)]
('RE',
 ('RE',
  ('simple_RE',
   ('basic_RE',
    ('star',
     ('elementary_RE',
      ('group',
       ('special', '('),
       ('RE',
        ('simple_RE',
         ('basic_RE', ('elementary_RE', ('char', ('literal', 'a'))))),
        ('union',
         ('special', '|'),
         ('simple_RE',
          ('basic_RE', ('elementary_RE', ('char', ('literal', 'b'))))))),
       ('special', ')'))),
     ('special', '*'))),
   ('concatenation',
    ('simple_RE',
     ('basic_RE', ('elementary_RE', ('char', ('literal', 'a')))),
     ('concatenation',
      ('simple_RE',
       ('basic_RE', ('elementary_RE', ('char', ('literal', 'a')))),
       ('concatenation',
        ('simple_RE',
         ('basic_RE', ('elementary_RE', ('char', ('literal', 'b')))))))))))),
 ('eos', None))

([cC]at)|([dD]og)
[('special', '('),
 ('special', '['),
 ('literal', 'c'),
 ('literal', 'C'),
 ('special', ']'),
 ('literal', 'a'),
 ('literal', 't'),
 ('special', ')'),
 ('special', '|'),
 ('special', '('),
 ('special', '['),
 ('literal', 'd'),
 ('literal', 'D'),
 ('special', ']'),
 ('literal', 'o'),
 ('literal', 'g'),
 ('special', ')'),
 ('eos', None)]
('RE',
 ('RE',
  ('simple_RE',
   ('basic_RE',
    ('elementary_RE',
     ('group',
      ('special', '('),
      ('RE',
       ('simple_RE',
        ('basic_RE',
         ('elementary_RE',
          ('set',
           ('positive_set',
            ('special', '['),
            ('set_items',
             ('set_item', ('char', ('literal', 'c'))),
             ('set_items',
              ('set_item', ('char', ('literal', 'C'))))),
            ('special', ']'))))),
        ('concatenation',
         ('simple_RE',
          ('basic_RE', ('elementary_RE', ('char', ('literal', 'a')))),
          ('concatenation',
           ('simple_RE',
            ('basic_RE',
             ('elementary_RE', ('char', ('literal', 't')))))))))),
      ('special', ')'))))),
  ('union',
   ('special', '|'),
   ('simple_RE',
    ('basic_RE',
     ('elementary_RE',
      ('group',
       ('special', '('),
       ('RE',
        ('simple_RE',
         ('basic_RE',
          ('elementary_RE',
           ('set',
            ('positive_set',
             ('special', '['),
             ('set_items',
              ('set_item', ('char', ('literal', 'd'))),
              ('set_items',
               ('set_item', ('char', ('literal', 'D'))))),
             ('special', ']'))))),
         ('concatenation',
          ('simple_RE',
           ('basic_RE',
            ('elementary_RE', ('char', ('literal', 'o')))),
           ('concatenation',
            ('simple_RE',
             ('basic_RE',
              ('elementary_RE', ('char', ('literal', 'g')))))))))),
       ('special', ')'))))))),
 ('eos', None))