Cette feuille a pour but de travailler a la construction d'un AST

Ne pas oublier :
lorsque l'AST est construit :
    - 1 seul token equals
    - 1 partie droit existante
    - rajouter les prochains trucs a verifier a posteriori

In [171]:
class AST(object):
    pass

class Node_letter(object):
    
    def __init__(self, token, name):
        self.token = token
        self.name = name
        self.state = 0
        
    def __str__(self):
        return "Node_letter({})".format(self.token)
        
class Node_condition(object):
    
    def __init__(self, left, cond, right):
        self.left = left
        self.token = self.cond = cond
        self.right = right
        #self.not = 0
        
    def __str__(self):
        return "Node_condition({}".format(self.token)
class Token(object):
    def __init__(self, token_type, value):
        self.type = token_type
        self.value = value

    def __str__(self):
        """String representation of the class instance.
        Examples:
            Token(NEG, "!")
            Token(LETTER, "A")
            Token(AND, '+')
            Token(OR, '|')
            Token(XOR, '^')
        """
        return 'Token({type}, {value})'.format(
            type=self.type,
            value=repr(self.value)
        )

    def __repr__(self):
        return self.__str__()            

In [213]:
class Lexer(object):
    def __init__(self, line):
        self.rule = line
        
        self.pos = 0
        self.current_char = self.rule[self.pos]
    
    def error(self, EOL=False):
        if EOL == False:
            raise Exception('Invalid character \'{}\' at index {}'.format(self.rule[self.pos], self.pos + 1))
        else:
            raise Exception('Invalid character \'{}\' at index {}'.format("EOL", self.pos + 1))
    def get_next_token(self):
        dic_op = {
            "+": "AND",
            "|": "OR",
            "^": "XOR",
            "!": "NOT",
            "(": "OPEN_PAR",
            ")": "CLOSE_PAR"
        }
        dic_equal = {
            "=": "EQUAL",
            ">": "IMPLIES",
            "<": "ONLY_IF"
        }
        i = 0
        len_rule = len(self.rule)
        while i <  len_rule:
            self.pos = i
            if self.rule[i] == " ":
                i += 1
                continue
            if ord(self.rule[i]) in range(ord("A"), ord("Z") + 1):
                yield Token("LETTER", self.rule[i])
            elif self.rule[i] in dic_op:
                yield Token(dic_op[self.rule[i]], self.rule[i])
            elif self.rule[i] in dic_equal:
                tmp = self.get_equals_token(i, len_rule)
                i += tmp[0]
                yield tmp[1]
            else:
                self.error()
            i += 1
        yield Token("EOL", None)
    
    def get_equals_token(self, i, len_rule):
        if self.rule[i] == "<" and i < len_rule - 2:
            if self.rule[i + 1] == "=":
                if self.rule[i + 2] == ">":
                    return ([2, Token("ONLY_IF", "<=>")])
                else:
                    self.pos += 2
                    self.error()
            else:
                self.pos += 1
                self.error(self.rule[i + 1])
        elif self.rule[i] == "=" and i < len_rule - 1:
            if self.rule[i + 1] == ">":
                return ([1, Token("IMPLIES", "=>")])
            else:
                self.pos += 1
                self.error()
        if i == len_rule - 1:
            self.pos += 1
            self.error(EOL=True)
        elif i == len_rule - 2:
            self.pos += 2
            self.error(EOL=True)
        self.error()

le principe du Parser repose sur de la recursivitée, en impliquant des priorité sur les tokens rencontrés.

Dans notre problème, on peut distinguer 3 niveaux de priorités, du moins prioritaire au plus prioritaire

* 1: le token equal. (=> ou <=>)
* 2: operateurs : ^, |, +
* 3: lettres, parentheses

* A noter que la negation n'est pas encore bien integrée a l'arbre


la recursivitée est programmée comme ceci :
le niveau 1 apelle le niveau 2 qui apelle le niveau 3

In [250]:
class Parser(object):
    def __init__(self, lexer):
        self.lexer = lexer
        self.gen = self.lexer.get_next_token()
        self.current_token = next(self.gen)
        
    def error(self, s):
        raise Exception("Invalid syntax : {}".format(s))
    
    def get_next_token(self, token_type):
        if self.current_token.type == token_type:
            self.current_token = next(self.gen)
        else:
            self.error("could not find match of parenthesis")
    def parse(self):
        """
        Cette fonction a pour but d'enclencher le debut du parsing recursif.
        la variable node va etre le neud representant la racine de l'AST.
        """
        node = self.deep_one()
        if self.current_token.type != "EOL":
            self.error(self.lexer.error())
        return node
    
    def deep_one(self):
        """
        Cette pronfondeur de prioritée est assignée aux token suivants :
            Token(IMPLIES, '=>')
            Token(ONLY_IF, '<=>')            
            Token(NEG, '!')
        """
        node = self.deep_two()
        
        
        while self.current_token.type in ("IMPLIES", "ONLY_IF", "NEG"):
            token = self.current_token
            if token.type == "IMPLIES":
                self.get_next_token("IMPLIES")
            if token.type == "ONLY_IF":
                self.get_next_token("ONLY_IF")
            if token.type == "NEG":
                ### TO DO
                self.get_next_token("NEG")
                
            node = Node_condition(left=node, cond=token, right=self.deep_two())
            
        return node
    
    def deep_two(self):
        """
        Cette profondeur de prioritée est assignée aux token suivants :
            Token(AND, '+')
            Token(OR, '|')
            Token(XOR, '^')
        """
        node = self.deep_three()
        
        while self.current_token.type in ("AND", "OR", "XOR"):
            token = self.current_token
            if token.type == "AND":
                self.get_next_token("AND")
            if token.type == "OR":
                self.get_next_token("OR")
            if token.type == "XOR":
                self.get_next_token("XOR")
            
            node = Node_condition(left=node, cond=token, right=self.deep_three())
        return node
    
    
    def deep_three(self):
        """
        Cette profondeur de prioritée est assignée au token equals :
            Token(LETTER, 'A->Z')
            Token(OPEN_PAR, '(' )
            Token(CLOSE_PAR, ')' )
        """
        token = self.current_token
        if token.type == "LETTER":
            self.get_next_token("LETTER")
            return Node_letter(token, token.value)
        elif token.type == "OPEN_PAR":
            self.get_next_token("OPEN_PAR")
            node = self.deep_one()
            self.get_next_token("CLOSE_PAR")
            return node
        else:
            self.lexer.error()

In [252]:
rule = "A+B+(C => C)"
try:
    lexer = Lexer(rule)
    parser = Parser(lexer)
    root = parser.parse()
    print(root)
except Exception as e:
    print(e)

Node_condition(Token(AND, '+')


In [238]:
print(root)

Node_condition(Token(ONLY_IF, '<=>')
