In [7]:
import re

REGEX = {
    'TYPES_DEFIN': r'\b(NUM|VIRNUM|PAL|SIMNAO)\b',
    # 'FUNCS_COND': r'\b(REP|DUVIDA|ENQ)\b',
    'REP': r'\b(REP)\b',
    'ENQ': r'\b(ENQ)\b',
    'DUVIDA': r'\b(DUVIDA)\b',
    'IMPRIMIR': r'\b(IMPRIMIR)\b',
    'FUNCS_CAST': r'\b(PARAPAL|PARANUM|PARAVIRNUM|PARASIMNAO)\b',
    
    'OPER_COND': r'(\>\=|\<\=|\!\=|\=|\<|\>)',
    'OPER_ARIT': r'(\+|\-|\*|\/)',
    'OPER_LOGIC': r'(\~|\^|\ˆ)',
    'OPER_DEFIN': r'¬\s*\bISTOEH\b',

    'ID': r'\b[a-zA-Z_][a-zA-Z0-9_]*\b',

    'VIRNUM': r'[0-9][0-9]*,[0-9][0-9]*',
    'NUM': r'[0-9][0-9]*',
    'PAL': r'(\'[^\']*\'|\"[^\"]*\")',
    'SIMNAO': r'(\bSIM\b|\bNAO\b)',
    ',': r'\,',
    ';': r'\;',
    ':': r'\:',
    '(': r'\(',
    ')': r'\)',
    '«': r'\«',
    '»': r'\»'
}

def tokenize(code):
    tokens = []
    position = 0
    code = re.sub(r"\s+", " ", code.replace('\n', ' ').replace('\t', ' '))
    while position < len(code):
        matched = False

        for token_name, pattern in REGEX.items():
            match = re.match(pattern, code[position:])

            if match:
                value = match.group(0)
                tokens.append({'token': token_name, 'value': value, 'position': position})
                position += len(value)
                matched = True
                break

        if not matched:
            position += 1  

    tokens.append({'token': '$', 'value': '$', 'position': len(code)+1})
    return tokens



# ANALISE SINTÁTICA

In [8]:
syntax_table = {
    'MAIN': {
        'ID': ['ID', 'DECL', 'MAIN'],
        'ENQ': ['FUNCS_COND', 'MAIN'],
        'REP': ['FUNCS_COND', 'MAIN'],
        'DUVIDA': ['FUNCS_COND', 'MAIN'],
        'PARANUM': ['FUNCS_CAST', 'MAIN'],
        'PARAVIRNUM': ['FUNCS_CAST', 'MAIN'],
        'PARASIMNAO': ['FUNCS_CAST', 'MAIN'],
        'PARANUM': ['FUNCS_CAST', 'MAIN'],
        'IMPRIMIR': ['IMPRIMIR', '(', 'VALUE', 'MULTIPLE_PARAM', ')', ';', 'MAIN'],
        'ε': [],
    },
    'DECL': {
        'OPER_DEFIN': ['OPER_DEFIN', 'TYPES_DEFIN', 'ATRIB', ';'],
        ':': ['ATRIB', ';'],
    },
    'ATRIB': {
        ':': [':', 'VALUE']
    },
    'FUNCS_COND': {
        'DUVIDA': ['DUVIDA', '(', 'COND', ')', '«', 'MAIN', '»'],
        'REP': ['REP', '(', 'COND', ')', '«', 'MAIN', '»'],
        'ENQ': ['ENQ', '(', 'COND', ')', '«', 'MAIN', '»']
    },

    'COND': {
        'ID': ['VALUE', 'OPER_COND', 'VALUE', 'MULTIPLE_COND'],
        'NUM': ['VALUE', 'OPER_COND', 'VALUE', 'MULTIPLE_COND'],
        'VIRNUM': ['VALUE', 'OPER_COND', 'VALUE', 'MULTIPLE_COND'],
        'PAL': ['VALUE', 'OPER_COND', 'VALUE', 'MULTIPLE_COND'],
        'SIMNAO': ['VALUE', 'OPER_COND', 'VALUE', 'MULTIPLE_COND'],
        'ε': []
    },
    'VALUE': {
        'ID': ['ID', 'MULTIPLE_ARIT'],
        'NUM': ['NUM', 'MULTIPLE_ARIT'],
        'VIRNUM': ['VIRNUM', 'MULTIPLE_ARIT'],
        'PAL': ['PAL', 'MULTIPLE_ARIT'],
        'SIMNAO': ['SIMNAO', 'MULTIPLE_ARIT'],
        'FUNCS_CAST':['FUNCS_CAST', '(', 'VALUE', ')'],
        '(': ['FUNC']
    },
    'FUNC': {
        '(': ['(', 'VALUE', 'MULTIPLE_PARAM', ')', '«', 'MAIN', '»']
    },
    'OPER_COND': {
        '>': ['>'],
        '<': ['<'],
        '>=': ['>='],
        '<=': ['<='],
        '=': ['='],
    },
    'OPER_LOGIC': {
        'ˆ': ['ˆ'],
        '^': ['^'],
        '~': ['~'],
        'ε': []
    },
    'OPER_ARIT': {
        '+': ['+'],
        '-': ['-'],
        '*': ['*'],
        '/': ['/'],
        'ε': []
    },
    'MULTIPLE_ARIT': {
        'OPER_ARIT': ['OPER_ARIT', 'VALUE', 'MULTIPLE_ARIT'],
        'ε': []
    },
    'MULTIPLE_COND': {
        'OPER_LOGIC': ['OPER_LOGIC', 'COND', 'MULTIPLE_COND'], # PARA CONDICOES MULTIPLAS
        'ε': []
    },
    'MULTIPLE_PARAM': {
        ',': [',', 'VALUE', 'MULTIPLE_PARAM'], #PARA FUNCOES COM MULTIPLOS PARAMETROS
        'ε': []
    }
}


def syntax_analyzer(token_list):
    stack = ['$', 'MAIN']  # Initialize the stack with the start symbol
    index = 0  
    
    while len(stack) > 0 or len(token_list) < index:
        current_symbol = stack[-1]
        current_token = token_list[index]
        print('Pilha:', stack, "\nSimbolo atual:", current_symbol,"\nSimbolo recebido: ", current_token['token'], "Valor: ", current_token['value'], "Posicao:", current_token['position'], "\n" )
       
        if current_symbol == current_token['token']:
            print("Encontrou o simbolo!")
            stack.pop()
            index += 1
       
        elif current_symbol in syntax_table:
            # print(current_token['token'],  syntax_table[current_symbol])
            if current_token['token'] in syntax_table[current_symbol]:
                production = syntax_table[current_symbol][current_token['token']]
                stack.pop()
                for symbol in reversed(production):
                    print("Adicionada a Pilha:", stack)
                    stack.append(symbol)
            else:
                if 'ε' in syntax_table[current_symbol]:
                    print("Caiu em Epson, recuperação\n")
                    stack.pop()
                else:
                    print("Error: Unexpected token", current_token['value'], "Position: ", current_token['position'])
                    return False
        else:
            print("Error: Invalid symbol in the stack", current_symbol)
            return False

    if len(stack) == 0:
        print("\n\nEste código é valido!")
        return True
    else:
        print("Error: Syntax analysis failed.")
        return False



In [9]:

if __name__ == '__main__':
    file = './dml_language.disney'
    # with open(file, 'rb') as f:
    #     code = f.read().decode('utf-8')

    code = '''
        val1 ¬ ISTOEH NUM : 100;

        qtd ¬ ISTOEH NUM: 12;
        qtd2 ¬ ISTOEH NUM: 14;
        qtd3 ¬ ISTOEH VIRNUM: 12,5;
        qtd4 ¬ ISTOEH NUM: 12;
        qtd5 ¬ ISTOEH NUM: 12;
        qtd6 ¬ ISTOEH NUM: "12";

        ENQ (qtd >= 0) «

            func: (qtd1) «
                qtd: "Errou";
            »;

            DUVIDA ( val1 < 0 ^ val1 / qtd+1 > 0) «
                val1: val1 * qtd * 1;
                qtd: qtd - 1;
            »
            DUVIDA ( val1 = 0 + PARANUM(5) ) «
                qtd: "Errou";
            »
            DUVIDA (val1 > 0 ) «
                val1: PARAVIRNUM(val1/qtd);
                qtd: qtd - 1;
            »

            IMPRIMIR("QTD", qtd, "val1', val1);
        »
        
    '''

    tokens = tokenize(code)
    syntax_analyzer(tokens)
    # for token in tokens:
    #     print(f"Token: {token['token']}, Value: {token['value']}, Position: {token['position']}")

Pilha: ['$', 'MAIN'] 
Simbolo atual: MAIN 
Simbolo recebido:  ID Valor:  val1 Posicao: 1 

Adicionada a Pilha: ['$']
Adicionada a Pilha: ['$', 'MAIN']
Adicionada a Pilha: ['$', 'MAIN', 'DECL']
Pilha: ['$', 'MAIN', 'DECL', 'ID'] 
Simbolo atual: ID 
Simbolo recebido:  ID Valor:  val1 Posicao: 1 

Encontrou o simbolo!
Pilha: ['$', 'MAIN', 'DECL'] 
Simbolo atual: DECL 
Simbolo recebido:  OPER_DEFIN Valor:  ¬ ISTOEH Posicao: 6 

Adicionada a Pilha: ['$', 'MAIN']
Adicionada a Pilha: ['$', 'MAIN', ';']
Adicionada a Pilha: ['$', 'MAIN', ';', 'ATRIB']
Adicionada a Pilha: ['$', 'MAIN', ';', 'ATRIB', 'TYPES_DEFIN']
Pilha: ['$', 'MAIN', ';', 'ATRIB', 'TYPES_DEFIN', 'OPER_DEFIN'] 
Simbolo atual: OPER_DEFIN 
Simbolo recebido:  OPER_DEFIN Valor:  ¬ ISTOEH Posicao: 6 

Encontrou o simbolo!
Pilha: ['$', 'MAIN', ';', 'ATRIB', 'TYPES_DEFIN'] 
Simbolo atual: TYPES_DEFIN 
Simbolo recebido:  TYPES_DEFIN Valor:  NUM Posicao: 15 

Encontrou o simbolo!
Pilha: ['$', 'MAIN', ';', 'ATRIB'] 
Simbolo atual: ATRIB 
