###  2.19 编写一个简单的递归下降解析器

In [2]:
import re
import collections

# Token specification
NUM = r'(?P<NUM>\d+)'
PLUS = r'(?P<PLUS>\+)'
MINUS = r'(?P<MINUS>-)'
TIMES = r'(?P<TIMES>\*)'
DIVIDE = r'(?P<DIVIDE>/)'
LPAREN = r'(?P<LPAREN>\()'
RPAREN = r'(?P<RPAREN>\))'
WS = r'(?P<WS>\s+)'

master_pat = re.compile('|'.join([NUM, PLUS, MINUS, TIMES,
                                 DIVIDE, LPAREN, RPAREN, WS]))

# Tokenizer
Token = collections.namedtuple('Token', ['type', 'value'])

def genetate_tokens(text):
    scanner = master_pat.scanner(text)
    for m in iter(scanner.match, None):
        tok = Token(m.lastgroup, m.group())
        if tok.type != 'WS':
            yield tok

In [5]:
# Parser
class ExpressionEvaluator:
    '''
    Implementation of a recursive descent parser.Each method
    implements a single grammar rule.Use the ._accept() method
    to test and accept the current lookahead token. Use the ._expect()
    method to exactly match adn discard the next token on on the input
    (or raise a SyntaxError if it doesn't match).
    '''
    
    def parse(self, text):
        self.tokens = genetate_tokens(text)
        self.tok = None              # last symbol consumed
        self.nexttok = None          # next symbol tokenized
        self._advance()              # Load first lookahead token
        return self.expr()
    
    def _advance(self):
        'Advance one token ahead'
        self.tok, self.nexttok = self.nexttok, next(self.tokens, None)
        
    def _accept(self, toktype):
        'Test and consume the next token if it matches toktype'
        if self.nexttok and self.nexttok.type == toktype:
            self._advance()
            return True
        else:
            return False
        
    def _expect(self, toktype):
        'Consume next token if it matches toktype or raise SyntaxError'
        if not self._accept(toktype):
            raise SyntaxError('Expected ' + toktype)
            
    # Grammar rules follow
    
    def expr(self):
        "expression ::= term{ ('+' | '-') term }*"
        
        exprval = self.term()
        while self._accept('PLUS') or self._accept('MINUS'):
            op = self.tok.type
            right = self.term()
            if op == 'PLUS':
                exprval += right
            elif op == 'MINUS':
                exprval -= right
        return exprval
    
    def term(self):
        "term ::= factor { ('*' | '/' ) factor }*"
        
        termval = self.factor()
        while self._accept('TIMES') or self._accept('DIVIDE'):
            op = self.tok.type
            right = self.factor()
            if op == 'TIMES':
                termval *= right
            elif op == 'DIVIDE':
                termval /= right
        return termval
    
    def factor(self):
        "factor ::== NUM | (expr)"
        if self._accept('NUM'):
            return int(self.tok.value)
        elif self._accept('LPAREN'):
            exprval = self.expr()
            self._expect('RPAREN')
            return exprval
        else:
            raise SyntaxError('Expected NUMBER of LPAREN')
        
        

In [6]:
e = ExpressionEvaluator()
e.parse('2 + (3 + 4) * 5')

37

        如果想做的不只是纯粹的计算，就需要修改ExpressionEvaluator类来实现。
        如下实现构建了一棵简单的解析树。

In [None]:
class ExpressionTreeBuilder(ExpressionEvaluator):
    def expr(self):
        "expression ::= term { ('+' | '-') term }"
        
        exprval = self.term()
        while self._accept('PLUS') or self._accept('MINUS'):
            op = self.tok.type
            right = self.term()
            if op == 'PLUS':
                exprval = ('+', exprval, right)
            elif op == 'MINUS':
                exprval = ('-', exprval, right)
            return exprval
        
    def term(self):
        "term ::= factor { ('*' | '/') factor }"
        
        termval = self.factor()
        while self._accept('TIMES') or self._accept('DIVIDE'):
            op = self.tok.type
            right = self.factor()
            if op == 'TIMES':
                termval = ('*', termval, right)
            elif op == 'DIVIDE':
                termval = ('/', termval, right)
        return termval
    
    def factor(self):
        "factor ::= NUM | (expr)"
        
        if self._accept('NUM'):
            return int(self.tok.value)
        elif self._accept('LPAREN'):
            exprval = self.expr()
            self._expect('RPAREN')
            return exprval
        else:
            raise SyntaxError('Expected NUMBER or LPAREN')

用ply解析工具解析计算器表达式

In [22]:
from ply.lex import lex
from ply.yacc import yacc

# Token list
tokens = ['NUM', 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'LPAREN', 'RPAREN']

#Ignored characters

t_ignore = ' \t\n'

# Token specifications( as regexs)
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'

# Token processing functions
def t_NUM(t):
    r'\d+'
    t.value = int(t.value)
    return t

# Error handler
def t_error(t):
    print('Bad character: {!r}'.format(t.value[0]))
    t.skip(1)
    
# Build the lexer
lexer = lex()

# Grammar rules and handler functions
def p_expr(p):
    """
    expr: expr PLUS term
    | expr MINUS term
    """
    if p[2] == '+':
        p[0] = p[1] + p[3]
    elif p[2] == '-':
        p[0] = p[1] - p[3]
        
def p_expr_term(p):
    '''
    expr : term
    '''
    p[0] = p[1]
    
def p_term(p):
    """
    term: term TIMES factor
    | term DIVIDE factor
    """
    if p[2] == '*':
        p[0] = p[1] * p[3]
    elif p[2] == '/':
        p[0] == p[1] / p[3]
        
def p_term_factor(p):
    '''
    term: factor
    '''
    p[0] = p[1]
    
def p_factor(p):
    '''
    factor:NUM
    '''
    p[0] = p[1]

def p_factor_group(p):
    '''
    factor: LPAREN expr RPAREN
    '''
    p[0] = p[1]
    
def p_error(p):
    print('Syntax error')
    
parser = yacc()

parser.parse('2+3')

ERROR: Rule 't_ignored' defined for an unspecified token ignored


TypeError: <module '__main__'> is a built-in module

### 2.20 在字节串上执行文本操作