In [30]:
import Lexer.Lexer as lx
import re

In [31]:
default_depth = 500
max_depth = default_depth

class SyntaxBuilder:
    def __init__(self,path_grammar, init_symbol = 'S'):
        self.path_grammar = path_grammar
        
        self.init_symbol = init_symbol
        self.grammar = {}
        self.non_terminals = set()
        
        self.first = {}
        
        self.following = {}
        self.explored = set() #set to keep state of following
        
        self.predictions = {}
        self.getProd = []
        self.getId = {}
        
    def loadGrammar(self):
        f = open(self.path_grammar)
        lines = f.readlines()
        f.close()
        id_ = 0
        for line in lines:
            line = line.strip().split()
            if line[0] not in self.grammar:
                self.non_terminals.add(line[0])
                self.first[line[0]] = set()
                self.following[line[0]] = set()
                self.predictions[line[0]] = {}
                self.grammar[line[0]] = []
            
            self.getProd.append(line[1:])
            self.getId[str(line[1:])] = id_
            self.predictions[line[0]][id_] = set()
            self.grammar[line[0]].append(line[1:])
            id_+=1
        self.following[self.init_symbol] = {'$'} # Add to first symbol
        
    def primeros(self, v, precalc = False):
        global max_depth
        max_depth-=1
        
        if max_depth <=0 or len(v)==0:
            max_depth+=1
            return {'e'}
        
        if len(v) == 1 and v[0]=='e':
            max_depth+=1
            return {'e'}
        
        if v[0] not in self.non_terminals:
            max_depth+=1
            return {v[0]}
        
        if len(v) == 1 and v[0] in self.non_terminals:
            if precalc: return self.first[v[0]] # Used when we have already calculated it for non-terminals
            
            productions = self.grammar[v[0]] 
            first = set()
            for p in productions:
                first |= self.primeros(p)
            max_depth+=1
            self.first[v[0]] |= first
            return first
        
        first = self.primeros([v[0]])
        
        if 'e' in first:
            if len(v)>1:
                first.discard('e')
                first |= self.primeros(v[1:])
        max_depth+=1
        return first
    
    
    def siguiente(self, non_terminal): # S is the non-terminal
        global max_depth, default_depth
        
        self.explored.add(non_terminal)
        
        for production in self.grammar[non_terminal]:
            for i in range(len(production)):
                p = production[i]
                if p in self.non_terminals:
                    if p not in self.explored:
                        self.siguiente(p)
                    
                    max_depth = default_depth
                    first = self.primeros(production[i+1:])
                    
                    self.following[p] |= first - {'e'}
                    if 'e' in first:
                        self.following[p].add(non_terminal)
    def predict(self, S, prod):
        first = self.primeros(prod, True)
        if 'e' in first:
            first.discard('e')
            return first | self.following[S]
        else:
            return first
        
    def calcFirsts(self):
        global max_depth, default_depth
        for S in self.non_terminals:
            max_depth = default_depth
            self.primeros([S])
    
    def calcFollowing(self):
        
        for non_terminal in self.non_terminals:
            if non_terminal not in self.explored:
                self.siguiente(self.init_symbol)
        
        added = True #Placeholder, does nothing
        while added:
            added = False
            for non_terminal in self.non_terminals:
                current = self.following[non_terminal].copy()
                for element in self.following[non_terminal]:
                    if element in self.non_terminals:
                        to_add = self.following[element]
                        added = True
                        current |= to_add
                        current -= {non_terminal}
                        current -= {element}
                self.following[non_terminal] = current
    
    def calcPredictions(self):
        for k,productions in self.grammar.items():
            for production in productions:
                self.predictions[k][self.getId[str(production)]] = self.predict(k,production)
    
    def calculateAll(self):
        self.calcFirsts()
        self.calcFollowing()
        self.calcPredictions()

In [32]:
grammar_path = 'grammar.txt'
file_path = 'input.txt'
token_path = 'Lexer/tokens.txt'
reserved_path = 'Lexer/reserved.txt'

# initialize grammar and grammar sets
grammar = SyntaxBuilder(grammar_path,'A')
grammar.loadGrammar()
grammar.calculateAll()
print(grammar.predictions)
print(grammar.getProd)

{'A': {0: {'cow', 'bus', 'cat', 'big'}, 1: {'ant'}}, 'B': {2: {'big'}, 3: {'bus'}, 4: {'cow', 'cat'}}, 'C': {5: {'cat'}, 6: {'cow'}}}
[['B', 'C'], ['ant', 'A', 'all'], ['big', 'C'], ['bus', 'A', 'boss'], ['e'], ['cat'], ['cow']]


In [45]:
derivation = ["A", "$"]
lexer = lx.Lexer(file_path)
token_lexeme = {}

# ------ UTIL ----------

def get_lexeme(type_):
    global token_lexeme
    
    if type_ in token_lexeme: return token_lexeme[type_] #if token is tk_???
    return type_ # if token is reserved word

def loadTkSymb(token_path):
    global token_to_symb
    f = open(token_path)
    token_array = [x.strip().split('\t') for x in f.readlines()]
    f.close
    token_lexeme = {k:v for v,k in token_array}

# --------- MAIN ------------

def mainExists(file_path):
    # Here we find main on file
    lexer = lx.Lexer(file_path)
    lexer.readFile()
    tk = lexer.nextToken()
    while (tk.lexeme != '$'):
        if tk.lexeme == 'resource': return True
        tk = lexer.nextTokent()
    return False

def getNewPrefix(non_terminal, token_type):
    global grammar
    predictions = grammar.predictions[non_terminal]
    
    allTk = set()
    for i, prediction in predictions:
        allTk |= prediction
        if token_type in prediction:
            return grammar.getProd[i]
    
    return list(allTk) # In case we cannot solve the prefix
        

def derivate():
    global derivation, lexer, grammar
    lexer = lx.Lexer(file_path)
    lexer.readFile()
    tk = lexer.nextToken()
    prefix = []
    while(len(derivation)):
        a = derivation[0]
        
        if a in grammar.non_terminals: # Expand
            new_prefix = getPrediction(a ,tk.token_type)
            derivation = new_prefix + derivation[1:]
            prefix = new_prefix
        
        elif a == tk.token_type: # Match
            tk = lexer.nextToken()
            derivation = derivation[1:]
        else:
            return tk, prefix # It means we have unsatisfied expected values
    return tk, [] # it means it finished correctly

def execute():
    global derivation, lexer, file_path
    derivation = ['S','$']
    
    tk, answer = derivate()
    
    if len(answer): # We have not found a proper derivation
        answer = str(answer).strip('[]')
        print('<{},{}> Error sintactico: se encontro>: "{}"; se esperaba: {}.'.format(tk.row, tk.col, get_lexeme(tk.lexeme), answer))
        return
    
    # we finished the code processing
    print('El analisis sintactico ha finalizado exitosamente.')


def main(grammar, derivation):
    global file_path, lexer
    loadTkSymb()
    if not mainExists(file_path):
        print('Error sintactico: falta funcion_principal')
        return
    
    execute()

In [29]:
main()

{'tk_par_izq': '(', 'tk_par_der': ')', 'tk_increment': '++', 'tk_decrement': '--', 'tk_puntero': '^', 'tk_bit-wise': '~', 'tk_mas': '+', 'tk_menos': '-', 'tk_direccion': '@', 'tk_num_llamadas': '?', 'tk_exp': '**', 'tk_multi': '*', 'tk_div': '/', 'tk_residuo': '%', 'tk_swap': ':=:', 'tk_conc': '||', 'tk_left_shift': '<<', 'tk_right_shift': '>>', 'tk_igual': '=', 'tk_coma': ',', 'tk_punto_y_coma': ';', 'tk_asig': ':=', 'tk_dos_puntos': ':', 'tk_punto': '.', 'tk_ejecuta': '->', 'tk_inc_asign': '+:=', 'tk_dec_asign': '-:=', 'tk_mult_asign': '*:=', 'tk_div_asign': '/:=', 'tk_rem_asign': '%:=', 'tk_exp_asign': '**:=', 'tk_or_asign': '|:=', 'tk_and_asign': '&:=', 'tk_concat_asign': '||:=', 'tk_left_shift_asign': '<<:=', 'tk_right_shift_asign': '>>:=', 'tk_distinto': '~=', 'tk_cor_izq': '[', 'tk_cor_der': ']', 'tk_menorque': '<', 'tk_mayorque': '>', 'tk_mayor_igual': '>=', 'tk_menor_igual': '<=', 'tk_separa': '[]', 'tk_llave_izq': '{', 'tk_llave_der': '}', 'tk_slice': '...', 'tk_div_sum': '/+

In [34]:
type([])

list

In [43]:
b = list({1,2,3,4})
str(b).strip('[]')

'1, 2, 3, 4'