In [1]:
import Lexer.Lexer as lx
import re

In [8]:
default_depth = 500
max_depth = default_depth

class SyntaxBuilder:
    def __init__(self,path_grammar, init_symbol = 'S'):
        self.path_grammar = path_grammar
        
        self.init_symbol = init_symbol
        self.grammar = {}
        self.non_terminals = set()
        
        self.first = {}
        
        self.following = {}
        self.explored = set() #set to keep state of following
        
        self.predictions = {}
        self.getProd = []
        self.getId = {}
        
    def loadGrammar(self):
        f = open(self.path_grammar)
        lines = f.readlines()
        f.close()
        id_ = 0
        for line in lines:
            line = line.strip().split()
            if line[0] not in self.grammar:
                self.non_terminals.add(line[0])
                self.first[line[0]] = set()
                self.following[line[0]] = set()
                self.predictions[line[0]] = {}
                self.grammar[line[0]] = []
            
            self.getProd.append(line[1:])
            self.getId[str(line[1:])] = id_
            self.predictions[line[0]][id_] = set()
            self.grammar[line[0]].append(line[1:])
            id_+=1
        self.following[self.init_symbol] = {'$'} # Add to first symbol
        
    def primeros(self, v, precalc = False):
        global max_depth
        max_depth-=1
        
        if max_depth <=0 or len(v)==0:
            max_depth+=1
            return {'e'}
        
        if len(v) == 1 and v[0]=='e':
            max_depth+=1
            return {'e'}
        
        if v[0] not in self.non_terminals:
            max_depth+=1
            return {v[0]}
        
        if len(v) == 1 and v[0] in self.non_terminals:
            if precalc: return self.first[v[0]] # Used when we have already calculated it for non-terminals
            
            productions = self.grammar[v[0]] 
            first = set()
            for p in productions:
                first |= self.primeros(p)
            max_depth+=1
            self.first[v[0]] |= first
            return first
        
        first = self.primeros([v[0]])
        
        if 'e' in first:
            if len(v)>1:
                first.discard('e')
                first |= self.primeros(v[1:])
        max_depth+=1
        return first
    
    
    def siguiente(self, non_terminal): # S is the non-terminal
        global max_depth, default_depth
        
        self.explored.add(non_terminal)
        
        for production in self.grammar[non_terminal]:
            for i in range(len(production)):
                p = production[i]
                if p in self.non_terminals:
                    if p not in self.explored:
                        self.siguiente(p)
                    
                    max_depth = default_depth
                    first = self.primeros(production[i+1:])
                    
                    self.following[p] |= first - {'e'}
                    if 'e' in first:
                        self.following[p].add(non_terminal)
    def predict(self, S, prod):
        first = self.primeros(prod, True)
        if 'e' in first:
            first.discard('e')
            return first | self.following[S]
        else:
            return first
        
    def calcFirsts(self):
        global max_depth, default_depth
        for S in self.non_terminals:
            max_depth = default_depth
            self.primeros([S])
    
    def calcFollowing(self):
        
        for non_terminal in self.non_terminals:
            if non_terminal not in self.explored:
                self.siguiente(self.init_symbol)
        
        added = True #Placeholder, does nothing
        while added:
            added = False
            for non_terminal in self.non_terminals:
                current = self.following[non_terminal].copy()
                for element in self.following[non_terminal]:
                    if element in self.non_terminals:
                        to_add = self.following[element]
                        added = True
                        current |= to_add
                        current -= {non_terminal}
                        current -= {element}
                self.following[non_terminal] = current
    
    def calcPredictions(self):
        for k,productions in self.grammar.items():
            for production in productions:
                self.predictions[k][self.getId[str(production)]] = self.predict(k,production)
    
    def calculateAll(self):
        self.calcFirsts()
        self.calcFollowing()
        self.calcPredictions()

In [9]:
grammar_path = 'grammar.txt'
file_path = 'input.txt'
token_path = 'Lexer/tokens.txt'
reserved_path = 'Lexer/reserved.txt'

# initialize grammar and grammar sets
grammar = SyntaxBuilder(grammar_path,'COMPONENT')
grammar.loadGrammar()
grammar.calculateAll()
print(grammar.predictions)
print(grammar.getProd)

{'COMPONENT': {0: {'global', 'resource'}}, 'SPEC_COMPONENT': {1: {'global', 'resource'}}, 'COMP_LABEL': {2: {'global', 'resource'}}, 'COMP_KWD': {3: {'global'}, 4: {'resource'}}, 'SPEC_STMT_LS': {5: {'tk_type'}}, "SPEC_STMT_LS'": {6: {'separate'}}, 'SPEC_STMT': {7: {'tk_type'}}, 'COMMON_STMT': {8: {'tk_type'}}, 'DECL': {9: {'tk_type'}}, 'TYPE_DECL': {10: {'tk_type'}}, 'TYPE': {11: {'tk_cor_izq'}}, 'SUBSCRIPTS': {12: {'tk_cor_izq'}}, 'BRACKETED_LIST': {13: {'tk_cor_izq'}}, 'BOUND_LP': {14: {'id'}}, 'BOUNDS': {15: {'id'}}, 'BOUND': {16: set(), 34: {'id'}}, 'EXPR': {17: {'id'}}, 'LITERAL': {18: {'int'}}, 'INVOCATION': {19: {'id'}}, 'PAREN_LIST': {20: {'tk_par_izq'}}, 'PAREN_ITEM_LS': {21: {'id'}}, 'EXPR_LP': {22: {'id'}}, 'CONSTRUCTOR': {23: {'tk_par_izq'}}, 'CONSTR_ITEM_LP': {24: {'id'}}, 'CONSTR_ITEM': {25: {'id'}}, 'BINARY_EXPR': {26: {'id'}}, 'PREFIX_EXPR': {27: {'tk_bit_wise'}}, 'BASIC_TYPE': {28: {'bool'}}, 'PAREN_EXPR': {29: {'tk_par_izq'}}, 'SUBSCRIPTS_OPT': {30: {'tk_cor_izq'}}, 

In [14]:
derivation = ["COMPONENT", "$"]
lexer = lx.Lexer(file_path)
token_lexeme = {}

# ------ UTIL ----------

def get_lexeme(type_):
    global token_lexeme
    
    if type_ in token_lexeme: return token_lexeme[type_] #if token is tk_???
    return type_ # if token is reserved word

def loadTkSymb():
    global token_to_symb,token_path
    f = open(token_path)
    token_array = [x.strip().split('\t') for x in f.readlines()]
    f.close
    token_lexeme = {k:v for v,k in token_array}

# --------- MAIN ------------

def mainExists(file_path):
    # Here we find main on file
    lexer = lx.Lexer(file_path)
    lexer.readFile()
    tk = lexer.nextToken()
    while (tk.lexeme != '$'):
        if tk.lexeme == 'resource': return True
        tk = lexer.nextTokent()
    return False

def getNewPrefix(non_terminal, token_type):
    global grammar
    predictions = grammar.predictions[non_terminal]
    print("<<<PREDICTIONS: ")
    allTk = set()
    for i, prediction in predictions.items():
        allTk |= prediction
        print("<<<<<<<<<<<<<",grammar.getProd[i],prediction)
        if token_type in prediction:
            return grammar.getProd[i]
    
    return list(allTk) # In case we cannot solve the prefix
        

def derivate():
    global derivation, lexer, grammar
    lexer = lx.Lexer(file_path)
    lexer.readFile()
    tk = lexer.nextToken()
    prefix = []
    while(len(derivation)):
        print("-------------")
        a = derivation[0]
        print(">>>>",derivation)
        print("<<<TK: ",tk.parse())
        if a in grammar.non_terminals: # Expand
            new_prefix = getNewPrefix(a ,tk.token_type)
            derivation = new_prefix + derivation[1:]
            prefix = new_prefix
            print('')
        
        elif a == tk.token_type: # Match
            tk = lexer.nextToken()
            derivation = derivation[1:]
        else:
            print("-----",derivation)
            print("-----TK: ",tk.parse())
            return tk, prefix # It means we have unsatisfied expected values
    return tk, [] # it means it finished correctly

def execute():
    global derivation
    derivation = ['COMPONENT','$']
    
    tk, answer = derivate()
    
    if len(answer): # We have not found a proper derivation
        answer = str(answer).strip('[]')
        print('<{},{}> Error sintactico: se encontro>: "{}"; se esperaba: {}.'.format(tk.row, tk.col, get_lexeme(tk.lexeme), answer))
        return
    
    # we finished the code processing
    print('El analisis sintactico ha finalizado exitosamente.')


def main():
    global file_path, lexer
    loadTkSymb()
    if not mainExists(file_path):
        print('Error sintactico: falta funcion_principal')
        return
    
    execute()

In [15]:
main()

-------------
>>>> ['COMPONENT', '$']
<<<TK:  <resource,3,1>
<<<PREDICTIONS: 
<<<<<<<<<<<<< ['SPEC_COMPONENT', 'tk_punto_y_coma', 'COMBINED_COMPONENT', 'tk_punto_y_coma', 'SEPARATE_BODY', 'tk_punto_y_coma', 'e'] {'global', 'resource'}
-------------
>>>> ['SPEC_COMPONENT', 'tk_punto_y_coma', 'COMBINED_COMPONENT', 'tk_punto_y_coma', 'SEPARATE_BODY', 'tk_punto_y_coma', 'e', '$']
<<<TK:  <resource,3,1>
<<<PREDICTIONS: 
<<<<<<<<<<<<< ['COMP_LABEL', 'SPEC_STMT_LS', 'SPEC_BODY'] {'global', 'resource'}
-------------
>>>> ['COMP_LABEL', 'SPEC_STMT_LS', 'SPEC_BODY', 'tk_punto_y_coma', 'COMBINED_COMPONENT', 'tk_punto_y_coma', 'SEPARATE_BODY', 'tk_punto_y_coma', 'e', '$']
<<<TK:  <resource,3,1>
<<<PREDICTIONS: 
<<<<<<<<<<<<< ['COMP_KWD', 'id'] {'global', 'resource'}
-------------
>>>> ['COMP_KWD', 'id', 'SPEC_STMT_LS', 'SPEC_BODY', 'tk_punto_y_coma', 'COMBINED_COMPONENT', 'tk_punto_y_coma', 'SEPARATE_BODY', 'tk_punto_y_coma', 'e', '$']
<<<TK:  <resource,3,1>
<<<PREDICTIONS: 
<<<<<<<<<<<<< ['global

In [6]:
grammar.non_terminals

{'BASIC_TYPE',
 'BEGIN_END',
 'BINARY_EXPR',
 'BLOCK',
 'BLOCK_ITEM',
 'BLOCK_ITEMS',
 'BODY_ONLY',
 'BODY_STMT',
 'BODY_STMT_LS',
 'BOUND',
 'BOUNDS',
 'BOUND_LP',
 'BRACKETED_LIST',
 'CAPABILITY_DEF',
 'CAP_FOR',
 'COLON_OPT',
 'COMBINED_COMPONENT',
 'COMBINED_SPECPART',
 'COMMON_STMT',
 'COMPONENT',
 'COMP_KWD',
 'COMP_LABEL',
 'COMP_PARAMS',
 'CONCURRENT_CMD',
 'CONCURRENT_CMD_LP',
 'CONCURRENT_INVOCATION',
 'CONCURRENT_STMT',
 'CONSTRUCTOR',
 'CONSTR_ITEM',
 'CONSTR_ITEM_LP',
 'CREATE_CALL',
 'CREATE_EXPR',
 'DECL',
 'DESTROY_STMT',
 'DIRECTION',
 'DO_STMT',
 'ELSE_CMD_OPT',
 'END_ID',
 'ENUM_DEF',
 'EQ_OPT',
 'EXIT_CODE_OPT',
 'EXIT_STMT',
 'EXPLICIT_CALL',
 'EXPR',
 'EXPR_LP',
 'EXTEND_CLAUSE',
 'FIELD',
 'FIELD_LP',
 'FINAL_BLOCK',
 'FINAL_OPT',
 'FORWARD_STMT',
 'FOR_ALL_STMT',
 'GUARDED_CMD',
 'GUARDED_CMD_LP',
 'ID_LP',
 'ID_LS',
 'ID_OPT',
 'ID_SUBS',
 'ID_SUBS_LP',
 'IF_STMT',
 'IMPORT_CLAUSE',
 'IMPORT_LIST',
 'IMPORT_NAME',
 'INITIAL_BLOCK',
 'INITIAL_OPT',
 'INPUT_STMT'

In [7]:
b = list({1,2,3,4})
str(b).strip('[]')

'1, 2, 3, 4'