<a href="https://colab.research.google.com/github/gabrielmattia/OPyTaOn/blob/main/analizador_sintatico.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!pip install rply

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [76]:
import warnings
from rply import ParserGenerator, LexerGenerator, ParsingError
from rply.lexer import LexingError
from rply.token import BaseBox

# cria um objeto LexerGenerator
lg = LexerGenerator()

# insere as regras de formação (expressões regulares) dos tokens da linguagem

#PREDEFINED IDENTIFIERS
lg.add('VOID', r'void')


lg.add('BOOLEAN', r'boolean')
lg.add('TRUE', r'true')
lg.add('FALSE', r'false')
lg.add('READ', r'read')
lg.add('INTEGER', r'integer')
lg.add('WRITE', r'write')
lg.add('LABELS', r'labels')
lg.add('TYPES', r'types')
lg.add('VARS', r'vars')
lg.add('VAR', r'var')
lg.add('GOTO', r'goto')
lg.add('RETURN', r'return')
lg.add('IF', r'if')
lg.add('ELSE', r'else')
lg.add('WHILE', r'while')


lg.add('FUNCTIONS', r'functions')

lg.add('EQUAL', r'\==')
lg.add('ATTR', r'\=')

lg.add('NOTEQUAL', r'!=')
lg.add('NOT', r'\!')

lg.add('GREATEREQUALTHAN', r'>=')
lg.add('GREATERTHAN', r'>')

lg.add('LESSEREQUALTHAN', r'<=')
lg.add('LESSERTHAN', r'<')

lg.add('OR', r'\|\|')
lg.add('AND', r'\&\&')


lg.add('NUMBER', r'\d+')
lg.add('PLUS', r'\+')
lg.add('MINUS', r'-')
lg.add('MULT', r'\*')
lg.add('DIV', r'/')
lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LCHAVE', r'\{')
lg.add('RCHAVE', r'\}')
lg.add('COMMA', r'\,')
lg.add('SEMICOLON', r'\;')
lg.add('COMMA', r'\,')
lg.add('COLON', r'\:')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('COLON', r'\:')



lg.add('ID', r'[a-z][a-z0-9]*')
lg.add('DIGIT', r'[0-9]*')



lg.add('MOD', r'%')

# cria uma regra para ignorar caracteres de espaços
lg.ignore('\s+')
lg.ignore('\#.+')

#####################################################
pg = ParserGenerator(
        ['VOID', 'ID', 'LPAREN', 'RPAREN', 'LCHAVE', 'RCHAVE', 'VAR','COLON', 'COMMA', 'ATTR',
         'MULT', 'DIV', 'AND', 'PLUS', 'MINUS', 'OR','INTEGER','DIGIT','NOT','COLON','VAR',
         'COMMA','SEMICOLON', 'GREATEREQUALTHAN', 'LESSEREQUALTHAN','LESSERTHAN','GREATERTHAN','EQUAL','NOTEQUAL',
         'FUNCTIONS','LABELS','VARS','RETURN','IF','ELSE','WHILE'],
     )


@pg.production('program : function')
def program(p):
    return p[0].getstr()

@pg.production('function : VOID ID formal_parameters block')
@pg.production('function : ID ID formal_parameters block')
def function(p):
    
    return p[0]

@pg.production('formal_parameters : LPAREN formal_parameters2')
def formal_parameters(p):
    return p[0]

@pg.production('formal_parameters1 : SEMICOLON formal_parameter formal_parameters1')
def formal_parameters1(p):

    return p[0]

@pg.production('formal_parameters2 : expression_parameter formal_parameters1 RPAREN')
@pg.production('formal_parameters2 : formal_parameters1 RPAREN')
@pg.production('formal_parameters2 : RPAREN')
def formal_parameters2(p):
    return p[0]

@pg.production('formal_parameter : expression_parameter')
def formal_parameter(p):
    return p[0]

@pg.production('expression_parameter : identifier_list COLON ID')
@pg.production('expression_parameter : VAR identifier_list COLON ID')
def expression_parameter(p):
   
    return p[0]

@pg.production('identifier_list : ID identifier_list1')
def identifier_list(p):
    return p[0]

@pg.production('identifier_list1 : COMMA ID identifier_list1')
def identifier_list1(p):
    return p[0]


@pg.production('block : variables block3')
@pg.production('block : LABELS block2')
@pg.production('block : FUNCTIONS body')
@pg.production('block : body')
def block(p):
    return p[0]

@pg.production('block1 :  FUNCTIONS body')
@pg.production('block1 :  body')
def block1(p):
    return p[0]

@pg.production('block2 :   FUNCTIONS body')
@pg.production('block2 :   body')
@pg.production('block2 :   variables block1')
def block2(p):
    return p[0]

@pg.production('block3 :    FUNCTIONS body')
@pg.production('block3 :     body')
def block3(p):
    return p[0]


@pg.production('variables : VARS identifier_list COLON type SEMICOLON variables1')
def variables(p):
    return p[0]

@pg.production('variables1 : identifier_list COLON type SEMICOLON variables1')
def variables1(p):
    return p[0]

@pg.production('type : ID')
def type(p):
    return p[0]

@pg.production('body : LCHAVE statement RCHAVE')
def body(p):
    return p[0]

@pg.production('statement : unlabeled_statement')
@pg.production('statement : compound')
@pg.production('statement : ID COLON unlabeled_statement')
@pg.production('statement : ID COLON unlabeled_statement')
def statement(p):
    return p[0]

@pg.production('unlabeled_statement : function_call_statement')
@pg.production('unlabeled_statement : empty_statement')
@pg.production('unlabeled_statement : retorno')
@pg.production('unlabeled_statement : conditional')
@pg.production('unlabeled_statement : repetitive')
def unlabeled_statement(p):
    return p[0]

@pg.production('function_call_statement : function_call SEMICOLON')
def function_call_statement(p):
    return p[0]

@pg.production('function_call : ID LPAREN expression_list RPAREN')
def function_call(p):
    return p[0]

@pg.production('assignment :  variable ATTR expression SEMICOLON')
def assignment(p):
    return p[0]

@pg.production('variable :  ID')
def variable(p):
    return p[0]

@pg.production('expression :  simple_expression expression1')
def expression(p):
    return p[0]

@pg.production('expression1 :  relational_operator simple_expression')
def expression1(p):
    return p[0]

@pg.production('simple_expression :  term simple_expression1')
@pg.production('simple_expression :  PLUS term simple_expression1')
@pg.production('simple_expression :  MINUS term simple_expression1')
def simple_expression(p):
    return p[0]

@pg.production('simple_expression1 : additive_operator term simple_expression1')
def simple_expression1(p):
    return p[0]

@pg.production('term : factor term1')
def term(p):
    return p[0]

@pg.production('term1 : multiplicative_operator factor term1')
def term1(p):
    return p[0]

@pg.production('factor : function_call')
@pg.production('factor :  LPAREN expression RPAREN')
@pg.production('factor :  NOT factor')
@pg.production('factor :  INTEGER')
def factor(p):
    return p[0]

@pg.production('multiplicative_operator :  MULT')
@pg.production('multiplicative_operator :  DIV')
@pg.production('multiplicative_operator :  AND')
def multiplicative_operator(p):
    return p[0]

@pg.production('relational_operator :  EQUAL')
@pg.production('relational_operator :  NOTEQUAL')
@pg.production('relational_operator :  LESSERTHAN')
@pg.production('relational_operator :  GREATERTHAN')
@pg.production('relational_operator :  LESSEREQUALTHAN')
@pg.production('relational_operator :  GREATEREQUALTHAN')
@pg.production('relational_operator :  AND')
def relational_operator(p):
    return p[0]

@pg.production('additive_operator :  PLUS')
@pg.production('additive_operator :  MINUS')
@pg.production('additive_operator :  OR')
def additive_operator(p):
    return p[0]

@pg.production('retorno : RETURN retorno1')
def retorno(p):
    return p[0]

@pg.production('retorno1 :  expression SEMICOLON')
@pg.production('retorno1 :  SEMICOLON')
def retorno1(p):
    return p[0]

@pg.production('conditional : IF LPAREN expression RPAREN compound conditional1')
def conditional(p):
    return p[0]

@pg.production('conditional1 : ELSE compound')
def conditional1(p):
    return p[0]

@pg.production('repetitive : WHILE LPAREN expression RPAREN compound')
def repetitive(p):
    return p[0]

@pg.production('empty_statement : SEMICOLON')
def empty_statement(p):
    return p[0]

@pg.production('compound : LCHAVE unlabeled_statement compound1 RCHAVE')
def compound(p):
    return p[0]
@pg.production('compound1 : unlabeled_statement compound1')
def compound1(p):
    return p[0]

@pg.production('expression_list : expression expression_list1')
@pg.production('expression_list : empty')
def expression_list(p):
    return p[0]
@pg.production('expression_list1 : COMMA expression expression_list1')
def expression_list1(p):
    return p[0]

@pg.production('empty : ')
def empty(p):
    return p[0]













def robo_run(source_code):
    l = lg.build()
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            for expr in pg.build().parse(l.lex(source_code)):
                print( expr)
    # except ParsingError as e:
        # print ("ParsingError: lineno={} colno={}".format(
        #     e.getsourcepos().lineno, e.getsourcepos().colno))
    except LexingError as e:
        print( "LexingError: lineno={} colno={}".format(
            e.getsourcepos().lineno, e.getsourcepos().colno))


if __name__ == '__main__':
    source_code = """
void id ( id : id ) {
      return ; 
   }
    """
    robo_run(source_code)

ParsingError: ignored

In [None]:
source_code = """
        
       void main ( ) 
        var m, n s : integer;
       {
         while ( m <=  n){
           s = s + m * m;
           write(m,s);
           m = m+1
         }
       }
        
    """

In [80]:
import warnings
from rply import ParserGenerator, LexerGenerator, ParsingError
from rply.lexer import LexingError
from rply.token import BaseBox

lg = LexerGenerator()

lg.add('NUMBER', r'\d+')
lg.add('WALK', r'andar')
lg.add('TURN', r'girar')
lg.add('STEP', r'passo(s)?')
lg.add('DOT', r'\.')
lg.add('COLON', r':')
lg.add('LEFT', r'esquerda')
lg.add('RIGHT', r'direita')
lg.add('DEGREE', r'grau(s)?')
lg.add('CATCH', r'pegar')
lg.add('DROP', r'soltar')
lg.add('DO', r'faça')
lg.add('END', r'fim')
lg.add('TIMES', r'vez(es)?')
lg.ignore(r'\s+')


class Number(BaseBox):

    def __init__(self, value):
        self.value = value

    def eval(self):
        return self.value


class Walk(BaseBox):

    def __init__(self, value):
        self.value = value

    def eval(self):
        return ">>> Andei {} passos!".format(self.value.eval())


class Turn(BaseBox):

    def __init__(self, degree, direction):
        self.degree = degree
        self.direction = direction

    def eval(self):
        return ">>> Girei {} graus a {}".format(self.degree.eval(),
                                                self.direction)


class Catch(BaseBox):

    def eval(self):
        return ">>> Peguei!"


class Drop(BaseBox):

    def eval(self):
        return ">>> Soltei!"


class DoStmt(BaseBox):
    def __init__(self, times, blocks):
        self.times = times
        self.blocks = blocks

    def eval(self):
        out = ">>> Começando a fazer\n"
        for i in range(self.times.eval()):
            for block in self.blocks:
                out += "\t" + block.eval() + "\n"
        out += ">>> fim"
        return out


pg = ParserGenerator(
    ['NUMBER', 'WALK', 'TURN', 'STEP', 'DOT', 'RIGHT',
     'LEFT', 'DEGREE', 'CATCH', 'DROP', 'DO', 'COLON',
     'TIMES', 'END'],
)


@pg.production('program : block')
def program(p):
    return p[0]


@pg.production('block : stmts')
def block(p):
    return p[0]


@pg.production('stmts : stmts stmt')
def stmts_b(p):
    if p[1] is None:
        return p[0]
    else:
        return p[0] + [p[1]]


@pg.production('stmts : stmt')
def stmts_stmt(p):
    if p[0] is None:
        return []
    else:
        return [p[0]]


@pg.production('stmt : walk_expr')
@pg.production('stmt : turn_expr')
@pg.production('stmt : catch_expr')
@pg.production('stmt : drop_expr')
@pg.production('stmt : do_stmt')
def stmt(p):
    return p[0]


@pg.production('walk_expr : WALK number STEP DOT')
def expression_walk(p):
    return Walk(p[1])


@pg.production('number : NUMBER')
def expr_number(p):
    return Number(int(p[0].getstr()))


@pg.production('turn_expr : TURN number DEGREE direction DOT')
def expression_turn(p):
    return Turn(p[1], p[3])


@pg.production('turn_expr : TURN direction DOT')
def expression_simple_turn(p):
    return Turn(Number(90), p[1])


@pg.production('direction : RIGHT')
@pg.production('direction : LEFT')
def direction(p):
    return p[0].getstr()


@pg.production('catch_expr : CATCH DOT')
def expression_catch(p):
    return Catch()


@pg.production('drop_expr : DROP DOT')
def expression_drop(p):
    return Drop()


@pg.production('do_stmt : DO number TIMES COLON block END')
def expression_do(p):
    return DoStmt(p[1], p[4])


def robo_run(source_code):
    l = lg.build()
    try:
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            for expr in pg.build().parse(l.lex(source_code)):
                print (expr.eval())
    except ParsingError as e:
        print ("ParsingError: lineno={} colno={}".format(
            e.getsourcepos().lineno, e.getsourcepos().colno))
    except LexingError as e:
        print ("LexingError: lineno={} colno={}".format(
            e.getsourcepos().lineno, e.getsourcepos().colno))


if __name__ == '__main__':
    source_code = """
        girar 3 graus esquerda.
        andar 10 passos.
        girar direita.
        pegar.
        soltar.
        faça 2 vezes:
            pegar.
            andar 1 passo.
            soltar.
        fim
        girar 45 graus direita.
    """
    robo_run(source_code)

>>> Girei 3 graus a esquerda
>>> Andei 10 passos!
>>> Girei 90 graus a direita
>>> Peguei!
>>> Soltei!
>>> Começando a fazer
	>>> Peguei!
	>>> Andei 1 passos!
	>>> Soltei!
	>>> Peguei!
	>>> Andei 1 passos!
	>>> Soltei!
>>> fim
>>> Girei 45 graus a direita
