# Hiena Multipass Parser

    hiena_mp()

The Hiena parser takes a Grammar and a Target, and generates a Dictionary tree.

    def hiena_mp(grammar, target, rulenam) -> dict:
        ...

The **target** can be a string, stream or (map, dict) pair.

## Multi-pass Parser

A multi-pass recursive-descent implementation.


In [4]:
import re
import json

LINE = '^.+$'
WORD = '\w+'
CHAR = '\w'
fieldschema = [
    'spec', 'file', 'vfstype', 
    'mntopts', 'freq', 'passno'
]
fstabg = {
    "entry": [LINE, "field" ],
    "field": [WORD, "", fieldschema],
    "char":  [CHAR, ""]
}


def hiena_mp(g, text,rulename):
    schema = list()
    tree = list()
    if rulename in g:
        rule = g[rulename]
        m = re.finditer(rule[0], 
                        text,
                        re.M
                       )
        try:
            schema = rule[2]
        except:
            pass
        nextrulename = rule[1]
        if nextrulename != "":
            for ea in m:
                tree.append(hiena_mp(
                      g, 
                      ea.group(0), 
                      nextrulename
                     ))
        else:
            for ea in m:
                tree.append(ea.group(0))
        try:
            labels = rule[2]
        except:
            return tree
        
        tree = { k:v for k,v 
                in zip(labels, 
                       tree
                      )}
        return tree

sample = """
one two three four
five six seven eight 23 4
"""
        
x = hiena_mp(fstabg,sample,"entry")
print(json.dumps(x))

[{"spec": "one", "file": "two", "vfstype": "three", "mntopts": "four"}, {"spec": "five", "file": "six", "vfstype": "seven", "mntopts": "eight", "freq": "23", "passno": "4"}]


In [3]:
a = [ x+1 for x in (1,2,36)]
a

[1, 2, 36]

In [2]:
import re

line = '^.+$'
word = '\w+'
flags = re.M

entry = [[word,flags],[1,2],dict()]
field = [[word,flags]
    ['spec', 
     'file', 
     'vfstype', 
     'mntopts',
     'freq',
     'passno'], 
     {'freq':'[0-9]+', 
      'passno':'[0-9]+'
     }
]

sample = """
one two three four 1 0
five six seven eight 23 4
"""

def parse(g, text):
    lex = g[0][0]
    fl  = g[0][1] 
    labels = g[1]
    sublex = g[2]
    m = re.findall(lex,text,flags)
    w = { k:v for k,v in zip(labels,m)}
    print(w)
    for ea in sublex:
        if ea in w:
            print(re.match(sublex[ea],w[ea],0))

parse(entry, sample)
parse(field, sample)

  field = [[word,flags]


TypeError: list indices must be integers or slices, not tuple

In [9]:
for i in 1-10:
    print(i)

SyntaxError: invalid syntax (<ipython-input-9-d3561bc18e6e>, line 1)

In [1]:
import re

ruleref_re = re.compile("(?P<surface_id>[/@]?)(?P<rule_id>[^{} ]+)(?P<qty>[{][*][}])?")

class HienaParser:
    def __init__(self, 
                 target=None,
                 grammar=None,
                ):
        self.target = target
        self.grammar = grammar
    
    def run_lex(self, 
                text:str,
                regex_args:list,
                ) -> list:
        if not type(regex_args) is list:
            raise TypeError('Requires a list of args suitable for re.findall()')
        _re = regex_args[0]
        try:
            _flags = regex_args[1]
        except:
            _flags = 0
        return re.findall(_re, text, _flags)
        
    def parse_rule_reference(self,ref):
        m = re.match(ruleref_re,ref,0)
        surfaceid = ''
        args = [ ref ]
        return (surfaceid,args)
        
    def run_rule(self, 
                 target:str = None,
                 rulename:str = "",
                 quantity:str = "*",
                ):
        rulepart = self.grammar[rulename]
        print('rulepart: '+rulepart)
        # for first element
        i = 0
        try:
            self.run_lex(rulepart[0])
            i += 1
        except:
            pass  

        def process_rulebody(e):
            try:
                a = self.parse_rule_reference(e)
                res = self.run_rule(*a[1])
            except:
                raise
                
        if type(rulepart) is str:
            process_rulebody(rulepart)
            
        if type(rulepart) is list:
            # loop over rulebody
            for e in rulepart[i:]:
                print(e)
                process_rulebody(e)
        
    def run(self):
        startname = self.grammar["$__start__"]
        return self.run_rule(self.target, 
                             rulename=startname
                          )
                        
def hiena1(target: str, grammar: dict) -> (map, dict):
    mapp = { k: re.findall(grammar[k],
                 target)
            for k in grammar }
    dirr = None
    return (mapp,dirr)

def hiena(target: str, grammar: dict) -> (map, dict):
    parser = HienaParser(target,grammar)
    return parser.run()
    

In [21]:
import re
surface_id = "(?P<surface_id>[/@]?)"
rule_id = "(?P<rule_id>[^{}+*? ]+)"
qty = "(?P<qty>[+*?]|(?:[{][1-9]+[}]))?"
carver = "([{][^{}]*[}])"
carvers = "(?:\W*"+carver+")+"
_="\W*"+carver
ruleref = surface_id+rule_id+qty+carvers
quantifier = "([*])|([1-9])|([^*, ]+)"
ruleref_re = re.compile(ruleref)
quantifier_re = re.compile(quantifier)

In [18]:
import re
fstabGrammar = {
    "$__start__": "fstab",
    "fstab": [ "/entry+" ],
    "entry": [ "ENTRY{2}"
               "{@field+:spec,file,vfstype,mntopts,freq:digit,passno:digit} {other} {such}" ], 
    "ENTRY": [[r"^[^#\n]+", re.M]],
    "field": [[r"[^# ]+"]],
    "digit": [[r"[0-9]"]], 
    
    " " : " "
}

In [9]:
print(fstabGrammar['entry'][0])

ENTRY{@field+:spec,file,vfstype,mntopts,freq:digit,passno:digit} {other} {such}


In [24]:
a = re.search( 
    ruleref_re,
    fstabGrammar['entry'][0], 
    0 
)
print(a.groups())

b = re.findall( 
    carver,
    fstabGrammar['entry'][0], 
    0 
)
print(b)

('', 'ENTRY', '{2}', '{such}')
[('', 'ENTRY', '{2}', '{such}')]


In [37]:
from fs.osfs import OSFS
from Dcel import Dcel

d = Dcel(address='fs', 
         service_class=OSFS
        )
text = d.path_lookup('.cosm/etc/fstab').value


In [None]:
s = '/entry{*}'
print(s[0])
print(s[1:].split('{')[1][0])
print()

strings = [ '/entry{*}', 
           '/entry',
           'entry{*}',
           'entry'
          ]

import re
for s in strings:
    c = re.compile("(?P<surface_id>[/@]?)(?P<rule_id>[^{} ]+)(?P<qty>[{][*][}])?")
    m = re.match(c,s,0)
    print(m.groupdict())

In [None]:
res = hiena(text,fstabGrammar)

In [None]:
print(res)

In [None]:
grammar = {"word": r"[^ ]+"}
hiena("one two three", grammar)

In [None]:
g = {'fs_entry': r"(.+)\n"}
d = """
sftp://example.com  /  sftpfs

localhost:/example  /  file

files.example.com   /  webdavfs
"""
hiena(d,g)