In [254]:
import Lexer.Lexer as lx
import re

In [255]:
default_depth = 600
max_depth = default_depth

class SyntaxBuilder:
    def __init__(self,path_grammar,path_input, init_symbol = 'S'):
        self.path_grammar = path_grammar
        self.path_input = path_input
        
        self.init_symbol = init_symbol
        self.grammar = {}
        self.non_terminals = set()
        
        self.first = {}
        
        self.following = {}
        self.explored = set() #set to keep state of following
        
        self.predictions = {}
        self.getProd = []
        self.getId = {}
        
    def loadGrammar(self):
        f = open(self.path_grammar)
        lines = f.readlines()
        f.close()
        id_ = 0
        for line in lines:
            line = line.strip().split()
            if line == [] or line[0]=='//': continue
            if line[0] not in self.grammar:
                self.non_terminals.add(line[0])
                self.first[line[0]] = set()
                self.following[line[0]] = set()
                self.predictions[line[0]] = {}
                self.grammar[line[0]] = []
            
            self.getProd.append(line[1:])
            self.getId[str(line[1:])] = id_
            self.predictions[line[0]][id_] = set()
            self.grammar[line[0]].append(line[1:])
            id_+=1
        self.following[self.init_symbol] = {'$'} # Add to first symbol
        
    def primeros(self, v, precalc = False):
        global max_depth
        max_depth-=1
        
        if max_depth <=0 or len(v)==0:
            #print('MAX_DEPTH REACHED')
            max_depth+=1
            return {'e'}
        
        if len(v) == 1 and v[0]=='e':
            max_depth+=1
            return {'e'}
        
        if v[0] not in self.non_terminals:
            max_depth+=1
            return {v[0]}
        
        if len(v) == 1 and v[0] in self.non_terminals:
            #print('<<<<<',v[0])
            if precalc: return self.first[v[0]] # Used when we have already calculated it for non-terminals
            
            productions = self.grammar[v[0]] 
            first = set()
            for p in productions:
                first |= self.primeros(p)
            max_depth+=1
            self.first[v[0]] |= first
            return first
        
        first = self.primeros([v[0]])
        
        if 'e' in first:
            if len(v)>1:
                first.discard('e')
                first |= self.primeros(v[1:])
        max_depth+=1
        return first
    
    
    def siguiente(self, non_terminal): # S is the non-terminal
        global max_depth, default_depth
        
        self.explored.add(non_terminal)
        
        for production in self.grammar[non_terminal]:
            for i in range(len(production)):
                p = production[i]
                if p in self.non_terminals:
                    if p not in self.explored:
                        self.siguiente(p)
                    
                    max_depth = default_depth
                    first = self.primeros(production[i+1:])
                    
                    self.following[p] |= first - {'e'}
                    if 'e' in first:
                        self.following[p].add(non_terminal)
    def predict(self, S, prod):
        first = self.primeros(prod, False)
        if 'e' in first:
            first.discard('e')
            return first | self.following[S]
        else:
            return first
        
    def calcFirsts(self):
        global max_depth, default_depth
        for S in self.grammar:
            max_depth = 600
            self.primeros([S])
    
    def calcFollowing(self):
        
        for non_terminal in self.non_terminals:
            if non_terminal not in self.explored:
                self.siguiente(self.init_symbol)
        
        added = True #Placeholder, does nothing
        while added:
            added = False
            for non_terminal in self.non_terminals:
                current = self.following[non_terminal].copy()
                for element in self.following[non_terminal]:
                    if element in self.non_terminals:
                        to_add = self.following[element]
                        added = True
                        current |= to_add
                        current -= {non_terminal}
                        current -= {element}
                self.following[non_terminal] = current
    
    def calcPredictions(self):
        for k,productions in self.grammar.items():
            for production in productions:
                self.predictions[k][self.getId[str(production)]] = self.predict(k,production)
    
    def calculateAll(self):
        print("<<<INIT FIRST")
        self.calcFirsts()
        print(">>>FIRST DONE")
        print("<<<INIT FOLLOWING")
        self.calcFollowing()
        print(">>>FOLLOWING DONE")
        print("<<<INIT PREDICTIONS")
        self.calcPredictions()
        print(">>>PREDICTIONS DONE")

In [3]:
grammar_path = 'grammar.txt'
file_path = 'input.txt'
token_path = 'Lexer/tokens.txt'
reserved_path = 'Lexer/reserved.txt'
# initialize grammar and grammar sets
grammar = SyntaxBuilder(grammar_path,'S')
grammar.loadGrammar()

In [4]:
grammar.calculateAll()

<<<INIT FIRST
>>>FIRST DONE
<<<INIT FOLLOWING
>>>FOLLOWING DONE
<<<INIT PREDICTIONS
>>>PREDICTIONS DONE


In [235]:
token = lexer.nextToken()

In [238]:
token

''

In [237]:
token.token_type

AttributeError: 'str' object has no attribute 'token_type'

In [194]:
for k,v in grammar.predictions.items():
    print(k)
    for p,q in v.items():
        print(grammar.getProd[p],q)

S
['A', 'B', 'C', "S'"] {'dos', 'uno', 'cuatro', '$', 'tres'}
S'
['uno', "S'"] {'uno'}
['e'] set()
['e'] {'$'}
A
['dos', 'B', 'C'] {'dos'}
['e'] set()
['e'] {'$', 'uno', 'tres', 'cuatro'}
B
['C', 'tres'] {'tres', 'cuatro'}
['e'] set()
['e'] {'$', 'uno', 'tres', 'cuatro'}
C
['cuatro', 'B'] {'cuatro'}
['e'] {'$', 'uno', 'tres', 'cuatro'}


In [None]:
print(grammar.predictions)
print(grammar.getProd)

In [None]:
derivation = ["ENTRY", "$"]
lexer = lx.Lexer(file_path)
token_lexeme = {}

# ------ UTIL ----------

def get_lexeme(type_):
    global token_lexeme
    
    if type_ in token_lexeme: return token_lexeme[type_] #if token is tk_???
    return type_ # if token is reserved word

def loadTkSymb():
    global token_to_symb,token_path
    f = open(token_path)
    token_array = [x.strip().split('\t') for x in f.readlines()]
    f.close
    token_lexeme = {k:v for v,k in token_array}

# --------- MAIN ------------

def mainExists(file_path):
    # Here we find main on file
    lexer = lx.Lexer(file_path)
    lexer.readFile()
    tk = lexer.nextToken()
    while (tk.lexeme != '$'):
        if tk.lexeme == 'resource': return True
        tk = lexer.nextTokent()
    return False

def getNewPrefix(non_terminal, token_type):
    global grammar
    predictions = grammar.predictions[non_terminal]
    print("<<<PREDICTIONS: ")
    allTk = set()
    for i, prediction in predictions.items():
        allTk |= prediction
        print("<<<<<<<<<<<<<",grammar.getProd[i],prediction)
        if token_type in prediction:
            return grammar.getProd[i]
    
    return list(allTk) # In case we cannot solve the prefix
        

def derivate():
    global derivation, lexer, grammar
    lexer = lx.Lexer(file_path)
    lexer.readFile()
    tk = lexer.nextToken()
    prefix = []
    while(len(derivation)):
        print("-------------")
        a = derivation[0]
        print(">>>>",derivation)
        print("<<<TK: ",tk.parse())
        if a in grammar.non_terminals: # Expand
            new_prefix = getNewPrefix(a ,tk.token_type)
            derivation = new_prefix + derivation[1:]
            prefix = new_prefix
            print('')
        
        elif a == tk.token_type: # Match
            tk = lexer.nextToken()
            derivation = derivation[1:]
        else:
            print("-----",derivation)
            print("-----TK: ",tk.parse())
            return tk, prefix # It means we have unsatisfied expected values
    return tk, [] # it means it finished correctly

def execute():
    global derivation
    derivation = ['COMPONENT','$']
    
    tk, answer = derivate()
    
    if len(answer): # We have not found a proper derivation
        answer = str(answer).strip('[]')
        print('<{},{}> Error sintactico: se encontro>: "{}"; se esperaba: {}.'.format(tk.row, tk.col, get_lexeme(tk.lexeme), answer))
        return
    
    # we finished the code processing
    print('El analisis sintactico ha finalizado exitosamente.')


def main():
    global file_path, lexer
    loadTkSymb()
    if not mainExists(file_path):
        print('Error sintactico: falta funcion_principal')
        return
    
    execute()

In [None]:
main()

In [None]:
grammar.non_terminals

In [None]:
b = list({1,2,3,4})
str(b).strip('[]')

In [127]:
token

'<tk_asig,4,13>'