In [9]:
import ply.lex as lex

class Lexer():
    
    def __init__(self, error_func):
        self.error_func = error_func
        self.filename = ''
        self.last_token = None

    def build(self, **kwargs):
        self.lexer = lex.lex(object=self, **kwargs)

    def reset_lineno(self):
        self.lexer.lineno = 1

    def input(self, text):
        self.lexer.input(text)

    def token(self):
        self.last_token = self.lexer.token()
        return self.last_token

    def find_tok_column(self, token):
        last_cr = self.lexer.lexdata.rfind('\n', 0, token.lexpos)
        return token.lexpos - last_cr

    # Internal auxiliary methods
    def _error(self, msg, token):
        location = self._make_tok_location(token)
        self.error_func(msg, location[0], location[1])
        self.lexer.skip(1)

    def _make_tok_location(self, token):
        return (token.lineno, self.find_tok_column(token))

    keywords = (
        'FOR','IF','PRINT', 'ASSERT', 'BREAK', 'CHAR', 'ELSE',
        'INT', 'FLOAT', 'READ', 'RETURN', 'VOID', 'WHILE',
    )

    tokens = keywords + (
        # identifiers
        'ID',

        # constants
        'INT_CONST',
        'FLOAT_CONST',
        'CHAR_CONST',

        # string literals
        'STRING_LITERAL',

        # operators
        'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
        'OR', 'AND', 'NOT', 'ADDRESS', 'LT', 'LE',
        'GT', 'GE', 'EQ', 'NE',

        # assignments
        'EQUALS', 'TIMESEQUAL', 'DIVEQUAL',
        'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',

        # increment/decrement
        'PLUSPLUS','MINUSMINUS',

        # delimiters
        'LPAREN', 'RPAREN',     # ( )
        'LBRACKET', 'RBRACKET', # [ ]
        'LBRACE', 'RBRACE',     # { }
        'COMMA', 'SEMI',        # , ;
    )

    # map for keywords
    keyword_map = {}
    for keyword in keywords:
        keyword_map[keyword.lower()] = keyword
 
    # regex for identifiers
    def t_ID(self, t):
        r'[a-zA-Z_][0-9a-zA-Z_]*'
        t.type = self.keyword_map.get(t.value, "ID")
        return t

    # regex for constants
    t_FLOAT_CONST = r'([0-9]*\.[0-9]+)|([0-9]+\.)'
    t_CHAR_CONST = r'\'.*\''
    t_INT_CONST = r'[0-9]+'

    # regex for string literals
    t_STRING_LITERAL = r'\".*?\"'

    # regex for operators
    t_PLUS = r'\+'
    t_MINUS = r'\-'
    t_TIMES = r'\*'
    t_DIVIDE = r'/'
    t_MOD = r'%'
    t_OR = r'\|\|'
    t_AND = r'&&'
    t_NOT = r'!'
    t_ADDRESS = r'&'
    t_LT = r'<'
    t_LE = r'<='
    t_GT = r'>'
    t_GE = r'>='
    t_EQ = r'=='
    t_NE = r'!='

    # regex for assignements
    t_EQUALS = r'='
    t_TIMESEQUAL = r'\*='
    t_DIVEQUAL = r'/='
    t_MODEQUAL = r'%='
    t_PLUSEQUAL = r'\+='
    t_MINUSEQUAL = r'\-='

    # regex for increment/decrement
    t_PLUSPLUS = r'\+\+'
    t_MINUSMINUS = r'\-\-'

    # regex for delimiters
    t_LPAREN = r'\('
    t_RPAREN = r'\)'
    t_LBRACKET = r'\['
    t_RBRACKET = r'\]'
    t_LBRACE = r'{'
    t_RBRACE = r'}'
    t_COMMA = r','
    t_SEMI = r';'

    # Newlines
    def t_NEWLINE(self, t):
        r'\n+'
        t.lexer.lineno += t.value.count("\n")

    t_ignore = ' \t'

    def t_comment(self, t):
        r'/\*(.|\n)*?\*/'
        t.lexer.lineno += t.value.count('\n')
        pass

    def t_error(self, t):
        msg = "Illegal character %s" % repr(t.value[0])
        self._error(msg, t)

    # Scanner (used only for test)
    def scan(self, data):
        self.lexer.input(data)
        while True:
            tok = self.lexer.token()
            if not tok:
                break
            print(tok)

In [10]:
__file__ = 'parser_at_a_glance.ipynb'    # necessário apenas no contexto do jupyter

def print_error(msg, x, y):
        print("Lexical error: %s at %d:%d" % (msg, x, y))
        
l = Lexer(print_error)
l.build()
l.scan("a = 3 * 4 + 5")

LexToken(ID,'a',1,0)
LexToken(EQUALS,'=',1,2)
LexToken(INT_CONST,'3',1,4)
LexToken(TIMES,'*',1,6)
LexToken(INT_CONST,'4',1,8)
LexToken(PLUS,'+',1,10)
LexToken(INT_CONST,'5',1,12)


In [47]:
import ply.yacc as yacc

class Parser():

    tokens = Lexer.tokens
    
    precedence = (
    ('left', 'PLUS', 'MINUS'),
    ('left', 'TIMES', 'DIVIDE', 'MOD'),
    )

    def __init__(self):
        def print_error(msg, x, y):
            print('Lexical error: %s at %d:%d' %(msg,x,y))
        self.lexer = Lexer(print_error)
        self.parser = yacc.yacc(module=self,write_tables=False)
        
    def parse(self, data):
        self.parser.parse(data)
    
    def p_empty(self, p):
        """empty :"""
        p[0] = None

    '''
    def p_program(self, p):
        """ 
        program : global_declaration_list
        """
        p[0] = p[1]
    '''

    def p_global_declaration_list(self, p):
        """ 
        global_declaration_list : global_declaration
                                | global_declaration_list global_declaration
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1]+[p[2]]

    def p_global_declaration(self, p):
        """
        global_declaration : function_definition
                           | declaration
        """
        p[0] = p[1]

    def p_declaration_list(self, p):
        """
        declaration_list : declaration
                         | declaration_list declaration
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1] + [p[2]]
    
    def p_declaration_list_opt(self, p):
        """
        declaration_list_opt : declaration_list
                             | empty
        """
        p[0] = p[1]

    def p_function_definition(self, p):
        """
        function_definition : type_specifier declarator declaration_list_opt compound_statement
                            | declarator declaration_list_opt compound_statement
        """
        if len(p)==5:
            p[0] = (p[1],p[2],p[3],p[4])
        else:
            p[0] = (p[1],p[2],p[3])
            
    def p_identifier(self, p):
        """
        identifier : ID
        """
        p[0] = p[1]
        
    def p_string(self, p):
        """
        string : STRING_LITERAL
        """
        p[0] = p[1]

    def p_integer_constant(self, p):
        """
        integer_constant : INT_CONST
        """
        p[0]= p[1]
        
    def p_character_constant(self, p):
        """
        character_constant : CHAR_CONST
        """
        p[0]= p[1]
        
    def p_floating_constant(self, p):
        """
        floating_constant : FLOAT_CONST
        """
        p[0]= p[1]
        
    def p_type_specifier(self, p):
        """
        type_specifier : VOID
                       | CHAR
                       | INT
                       | FLOAT
        """
        p[0] = p[1]

    def p_declarator(self, p):
        """
        declarator : identifier
                   | LPAREN declarator RPAREN
                   | declarator LBRACKET constant_expression RBRACKET

        """
        if len(p)==2:
            p[0] = p[1]
        elif len(p)==4:
            p[0] = p[2]
        elif len(p)==5:
            p[0] = p[1]+p[2]+p[3]+p[4] 

    def p_declarator2(self, p):
        """
        declarator : declarator LBRACKET RBRACKET
                   | declarator LPAREN parameter_list RPAREN
        """
        if len(p)==4:
            p[0] = p[1]+p[2]+p[3]
        elif len(p)==5:
            p[0] = (p[1],p[3])

    def p_identifier_list(self, p):
        """
        identifier_list : identifier
                        | identifier_list identifier
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1]+[p[2]]

    def p_identifier_list_opt(self, p):
        """
        identifier_list_opt : empty
                            | identifier_list
        """
        p[0] = p[1]

    def p_declarator3(self, p):
        """
        declarator : declarator LPAREN identifier_list_opt RPAREN
        """
        p[0] = (p[1],p[3])

    def p_constant_expression(self, p):
        """
        constant_expression : binary_expression
        """
        p[0] = p[1]

    def p_binary_expression(self, p):
        """
        binary_expression : cast_expression
                          | binary_expression TIMES binary_expression
                          | binary_expression DIVIDE binary_expression
                          | binary_expression MOD binary_expression
                          | binary_expression PLUS binary_expression
                          | binary_expression MINUS binary_expression
                          | binary_expression LT binary_expression
                          | binary_expression LE binary_expression
                          | binary_expression GT binary_expression
                          | binary_expression GE binary_expression
                          | binary_expression EQ binary_expression
                          | binary_expression NE binary_expression
                          | binary_expression AND binary_expression
                          | binary_expression OR binary_expression
        """
        p[0] = (p[2], p[1], p[3])

    def p_cast_expression(self, p):
        """
        cast_expression : unary_expression
                        | LPAREN type_specifier RPAREN cast_expression
        """
        if len(p)==2:
            p[0] = p[1]
        else:
            p[0] = ('cast',p[1],p[2])

    def p_unary_expression(self, p):
        """
        unary_expression : postfix_expression
                         | PLUSPLUS unary_expression
                         | MINUSMINUS unary_expression
                         | unary_expression cast_expression
        """
        if len(p)==2:
            p[0] = p[1]
        else:
            p[0] = (p[1],p[2])

    def p_postfix_expression(self, p):
        """
        postfix_expression : primary_expression
                           | postfix_expression PLUSPLUS
                           | postfix_expression MINUSMINUS
                           | postfix_expression LBRACKET expression RBRACKET
        """
        if len(p)==2:
            p[0] = p[1]
        if len(p)==3:
            p[0] = (p[1],p[2])
        else:
            p[0] = (p[1],p[3])

    def p_assignment_expression_list(self, p):
        """
        assignment_expression_list : assignment_expression
                                   | assignment_expression_list assignment_expression
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1]+[p[2]]
    
    def p_assignment_expression_list_opt(self, p):
        """
        assignment_expression_list_opt : empty
                                       | assignment_expression_list
        """
        p[0] = p[1]

    def p_postfix_expression2(self, p):
        """
        postfix_expression : postfix_expression LPAREN assignment_expression_list_opt RPAREN
        """
        p[0] = (p[1],p[3])

    def p_primary_expression(self, p):
        """
        primary_expression : identifier
                           | constant
                           | string
                           | LPAREN expression RPAREN
        """
        if len(p)==2: # (id,constant and string)
            p[0] = p[1]
        else: # (expression)
            p[0] = p[2]

    def p_constant(self, p):
        """
        constant : integer_constant
                 | character_constant
                 | floating_constant
        """
        p[0] = p[1]

    def p_expression(self, p):
        """
        expression : assignment_expression
                   | expression COMMA assignment_expression 
        """
        if len(p)==2:
            p[0] = p[1]
        else:
            p[0] = (p[2],p[1],p[3])

    def p_assignment_expression(self, p):
        """
        assignment_expression : binary_expression
                              | unary_expression assignment_operator assignment_expression
        """
        if len(p)==2:
            p[0] = p[1]
        else:
            p[0] = (p[2],p[1],p[3])

    def p_assignment_operator(self, p):
        """
        assignment_operator : EQUALS
                            | TIMESEQUAL
                            | DIVEQUAL
                            | MODEQUAL
                            | PLUSEQUAL
                            | MINUSEQUAL
        """
        p[0] = p[1]

    def p_unary_operator(self, p):
        """
        unary_operator : ADDRESS
                       | TIMES
                       | PLUS
                       | MINUS
                       | NOT
        """
        p[0] = p[1]

    def p_parameter_list(self, p):
        """
        parameter_list : parameter_declaration
                       | parameter_list COMMA parameter_declaration
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1] + [p[3]]

    def p_parameter_declaration(self, p):
        """
        parameter_declaration : type_specifier declarator
        """
        p[0] = (p[1],p[2])

    def p_init_declarator_list(self, p):
        """
        init_declarator_list : init_declarator
                             | init_declarator_list init_declarator
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1] + [p[2]]

    def p_init_declarator_list_opt(self, p):
        """
        init_declarator_list_opt : empty
                                 | init_declarator_list
        """
        p[0] = p[1]

    def p_declaration(self, p):
        """
        declaration : type_specifier init_declarator_list_opt SEMI
        """
        p[0] = (p[1],p[2])

    def p_init_declarator(self, p):
        """
        init_declarator : declarator
                        | declarator EQUALS initializer
        """
        if len(p)==2:
            p[0] = p[1]
        else:
            p[0] = ('=',p[1],p[3])

    def p_initializer(self, p):
        """
        initializer : assignment_expression
                    | LBRACE initializer_list RBRACE
                    | LBRACE initializer_list COMMA RBRACE
        """
        if len(p)==2:
            p[0] = p[1]
        else:
            p[0] = p[2]

    def p_initializer_list(self, p):
        """
        initializer_list : initializer
                         | initializer_list COMMA initializer
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1] + [p[3]]

    def p_statement_list(self, p):
        """
        statement_list : statement
                       | statement_list statement
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1] + [p[2]]

    def p_statement_list_opt(self, p):
        """
        statement_list_opt : empty
                           | statement_list
        """

    def p_compound_statement(self, p):
        """
        compound_statement : LBRACE declaration_list_opt statement_list_opt RBRACE
        """
        p[0] = (p[2],p[3])

    def p_statement(self, p):
        """
        statement : expression_statement
                  | compound_statement
                  | selection_statement
                  | iteration_statement
                  | jump_statement
                  | assert_statement
                  | print_statement
                  | read_statement
        """
        p[0] = p[1]

    def p_expression_opt(self, p):
        """
        expression_opt : expression
                       | empty
        """

    def p_expression_statement(self, p):
        """
        expression_statement : expression_opt SEMI
        """
        p[0] = p[1]

    def p_selection_statement(self, p):
        """
        selection_statement : IF LPAREN expression RPAREN statement
                            | IF LPAREN expression RPAREN statement ELSE statement

        """
        if len(p)==6:
            p[0] = (p[1],p[3],p[5])
        else:
            p[0] = (p[1],p[3],p[5],p[6],p[7])

    def p_iteration_statement(self, p):
        """
        iteration_statement : WHILE LPAREN expression RPAREN statement
                            | FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN statement
        """
        if len(p)==6: # while
            p[0] = (p[1],p[3],p[5])
        else: # for
            p[0] = (p[1],p[3],p[5],p[7],p[9])

    def p_jump_statement(self, p):
        """
        jump_statement : BREAK SEMI
                       | RETURN expression_opt SEMI
        """
        if len(p)==3:
            p[0] = p[1]
        else:
            p[0] = (p[1],p[2])

    def p_assert_statement(self, p):
        """
        assert_statement : ASSERT expression SEMI
        """
        p[0] = ('assert',p[2])

    def p_expression_list(self, p):
        """
        expression_list : expression
                        | expression_list expression
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1] + [p[2]]

    def p_print_statement(self, p):
        """
        print_statement : PRINT LPAREN expression_list RPAREN SEMI
                        | PRINT LPAREN RPAREN SEMI
        """
        if len(p)==6:
            p[0] = (p[1],p[3])
        else:
            p[0] = p[1]

    def p_declarator_list(self, p):
        """
        declarator_list : declarator
                        | declarator_list declarator
        """
        if len(p)==2:
            p[0] = [p[1]]
        else:
            p[0] = p[1] + [p[2]]

    def p_read_statement(self, p):
        """
        read_statement : READ LPAREN declarator_list RPAREN SEMI
        """
        p[0] = (p[1],p[3])
    
    def p_error(self, p):
        if p:
            print("Error near the symbol %s" % p.value)
        else:
            print("Error at the end of input")


In [48]:
p = Parser()
#p.parse('a = 3 * 4 + 5')

SyntaxError: unexpected EOF while parsing (parsetab.py, line 29)