In [624]:
class MathJSON:
    def __init__(self, dict):
        self.dict = dict
        
    def __add__(self, other):
        return MathJSON({ 'ADD' : [self.dict, other.dict] })
    
    def __mul__(self, other):
        return MathJSON({ 'MUL' : [self.dict, other.dict] })
    
    def __sub__(self, other):
        return MathJSON({ 'MINUS' : [self.dict, other.dict] })

    def __pow__(self, other):
        return MathJSON({ 'POW' : [self.dict, other.dict] })
    
    def __truediv__(self, other):
        return MathJSON({ 'DIV' : [self.dict, other.dict] })

    def func(self, variables):
        return MathJSON({ 'FUNCTION' : [self.dict, variables.dict]})

    def integrate(self, measure):
        return MathJSON({ 'INT' : [self.dict, measure.dict ]})

    def differentiate(self, measure):
        return MathJSON({ 'DIFF' : [self.dict, measure.dict ]})

In [625]:
from enum import Enum
from dataclasses import dataclass
import re
from more_itertools import peekable

class TokenType(Enum):
    NUMBER = 0
    PLUS = 1
    MINUS = 2
    MULTIPLY = 3
    DIVIDE = 4
    VARIABLE = 5
    OPERATOR = 6
    OBJECT = 7
    LPAREN = 8
    RPAREN = 9
    TENSOR = 10
    FLOAT = 11
    INTEGER = 12
    INTEGRAL = 13
    DIFFERENTIAL = 14
    SOLVER = 15
    FUNCTION = 16
    EQUALS = 17
    COMMA = 18
    POW = 19

@dataclass
class Token:
    type: TokenType
    value: any

    def __repr__(self):
        return self.type.name + (f":{self.value}" if self.value != None else "")

In [626]:
def match_tensors(i):
    string = i
    rank = string.count('_') + string.count('^')
    if rank > 0:
        pattern = lambda x : "([a-zA-Z]+)([_^]\{[a-zA-Z]+\}|[_^]\{[a-zA-Z]+\=[0-9]}){" + str(x) + "}(?=(\*|\)|\+|\-|\/|$))"
        Total = re.match(pattern(rank), string)
        return bool(Total)
    else:
        return False

match_tensors('integrate_{}')

False

In [652]:
WHITESPACE = ' \n\t'
DIGITS = '0987654321'
LOWERCASES = 'abcdefghijklmnopqrstuvwxyz'
UPPERCASES = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
CHARS = UPPERCASES + LOWERCASES + DIGITS
CHARACTERS = '{}[]_^=.:'
OBJECT_CHARACTERS = CHARACTERS + UPPERCASES + LOWERCASES + DIGITS

re_float           = '^(\d+)(\.)(\d+)$'
re_integer         = '^(\d+)$'
regex_integral     = '^(integrate)$'
regex_diff         = '^(diff)$'
regex_solve        = '^(solve)$'
re_variable        = '[a-z]+'

def match_tensors(i):
    string = i
    rank = string.count('_') + string.count('^')
    if rank > 0:
        pattern = lambda x : "([a-zA-Z]+)([_^]\{[a-zA-Z]+\}|[_^]\{[a-zA-Z]+\=[0-9]}){" + str(x) + "}(?=(\*|\)|\+|\-|\/|$))"
        Total = re.match(pattern(rank), string)
        return bool(Total)
    else:
        return False

class Lexer:
    def __init__(self, text):
        self.text = peekable(text)
        self.advance()

    def advance(self):
        try:
            self.current_char = next(self.text)
        except StopIteration:
            self.current_char = None

    def generate_tokens(self):
        while self.current_char != None:
            # If the current character is a empty space or new line, then move on to the next character.
            if self.current_char in WHITESPACE:
                self.advance()
            # If the current character is a . or a digit, then we should keep iterating through while its still a number.
            # When we do not have a .09090 OR 1212.1313 OR 23213 when we stop and returns a number Token, then continue the lexer.
            elif self.current_char == '.' or self.current_char in CHARS:
                yield self.generate_object()
            elif self.current_char == '+':
                self.advance()
                yield Token(TokenType.PLUS, None)
            elif self.current_char == '*':
                yield self.generate_operation()
            elif self.current_char == '-':
                self.advance()
                yield Token(TokenType.MINUS, None)
            elif self.current_char == '/':
                self.advance()
                yield Token(TokenType.DIVIDE, None)
            elif self.current_char == '(':
                self.advance()
                yield Token(TokenType.LPAREN, None)
            elif self.current_char == ')':
                self.advance()
                yield Token(TokenType.RPAREN, None)
            elif self.current_char == '=':
                self.advance()
                yield Token(TokenType.EQUALS, None)
            elif self.current_char == ',':
                self.advance()
                yield Token(TokenType.COMMA, None)
            else:
                raise Exception(f"Illegal Character '{self.current_char}'")

    def generate_operation(self):
        num = ''
        while self.current_char != None and self.current_char == '*':
            num += self.current_char
            self.advance()
        if num.count('*') == 1:
            return Token(TokenType.MULTIPLY, None)
        elif num.count('*') == 2:
            return Token(TokenType.POW, None)
        else:
            raise Exception(f"Illegal Character '{num}'")

    def generate_object(self):
        obj_str = self.current_char
        self.advance()
        while self.current_char != None and self.current_char in OBJECT_CHARACTERS:
            if self.current_char in CHARS and self.text.peek() == '(':
                obj_str += self.current_char
                self.advance()
                if re.match(regex_integral, obj_str):
                    return Token(TokenType.INTEGRAL, obj_str)
                elif re.match(regex_diff, obj_str):
                    return Token(TokenType.DIFFERENTIAL, obj_str)
                elif re.match(regex_solve, obj_str):
                    return Token(TokenType.SOLVER, obj_str)
                else:
                    return Token(TokenType.FUNCTION, obj_str)
            else:
                obj_str += self.current_char
                self.advance()

        if re.match(re_integer, obj_str):
            return Token(TokenType.INTEGER, int(obj_str))

        elif re.match(re_variable, obj_str):
            return Token(TokenType.VARIABLE, obj_str)

        elif re.match(re_float, obj_str):
            return Token(TokenType.FLOAT, float(obj_str))

        elif match_tensors(obj_str):
            return Token(TokenType.TENSOR, obj_str)

        elif self.current_char == None:
            return Token(TokenType.INTEGER, int(obj_str))


tokens = list(Lexer('3 * integrate(integrate(3*x) + Metric_{d=2}_{d})').generate_tokens())
print(list(Lexer('12+10').generate_tokens()))

RuntimeError: generator raised StopIteration

In [640]:
@dataclass
class PlusNode:

    def __init__(self, node):
        self.node = node
        self.dict = { "POSITIVE" : str(self.node)}

    def __repr__(self):
        return f"(+{self.node})"

@dataclass
class MinusNode:

    def __init__(self, node):
        self.node = node
        self.dict = { "NEGATIVE" : str(self.node)}

    def __repr__(self):
        return f"(-{self.node})"

@dataclass
class TensorNode:
    
    def __init__(self, value):
        self.value = value
        self.dict = { "TENSOR" : str(self.value)}

    def __repr__(self):
        return f"({self.value})"

@dataclass
class VariableNode:

    def __init__(self, value):
        self.value = value
        self.dict = { "VAR" : str(self.value)}

    def __repr__(self):
        return f"{self.value}"

@dataclass
class IntNode:

    def __init__(self, value):
        self.value = value
        self.dict = { "NUMBER_INT" : str(self.value)}

    def __repr__(self):
        return f"({self.value})"

@dataclass
class FloatNode:

    def __init__(self, value):
        self.value = value
        self.dict = { "NUMBER_FLOAT" : str(self.value)}

    def __repr__(self):
        return f"({self.value})"

@dataclass
class AddNode:
    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict = { "ADD" : [self.node_a, self.node_b]}


    def __repr__(self):
        return f"({self.node_a} + {self.node_b})"

@dataclass
class SubNode:

    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict = { "MINUS" : [self.node_a, self.node_b]}

    def __repr__(self):
        return f"({self.node_a} - {self.node_b})"

@dataclass
class MulNode:

    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict= { "MULTIPLY" : [self.node_a, self.node_b]}

    def __repr__(self):
        return f"({self.node_a} * {self.node_b})"

@dataclass
class PowNode:

    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict= { "POW" : [self.node_a, self.node_b]}

    def __repr__(self):
        return f"({self.node_a} ^ {self.node_b})"

@dataclass
class DivNode:
    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict = { "DIV" : [self.node_a, self.node_b]}

    def __repr__(self):
        return f"({self.node_a} / {self.node_b})"

@dataclass
class IntegrateNode:

    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict = { "INT" : [self.node_a, self.node_b]}

    def __repr__(self):
        return f"integrate({self.node_a}, {self.node_b})"

@dataclass
class DifferentialNode:

    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict = { "DIFF" : [self.node_a, self.node_b]}

    def __repr__(self):
        return f"differential({self.node_a}, {self.node_b})"

@dataclass
class EqualsNode:

    def __init__(self, node_a, node_b):
        self.node_a = node_a
        self.node_b = node_b
        self.dict = { "EQUALITY" : [self.node_a, self.node_b]}

    def __repr__(self):
        return f"{self.node_a} = {self.node_b}"

In [643]:
class Parser:
    def __init__(self, tokens):
        self.tokens = peekable(tokens)
        self.advance()

    def raise_error(self):
        raise Exception("Invalid Syntax")

    def advance(self):
        try:
            self.current_token = next(self.tokens)
        except StopIteration:
            self.current_token = None

    def parse(self):
        if self.current_token == None:
            return None
        result = self.expr()
        if self.current_token != None:
            self.raise_error()
        return result

    def find_variables(self):
        tokens = []
        if self.current_token.type != TokenType.VARIABLE:
            self.raise_error()
        tokens.append(self.object())
        while self.current_token != None and self.current_token.type == TokenType.COMMA:
            self.advance()
            if self.current_token.type != TokenType.VARIABLE:
                self.raise_error()
            tokens.append(self.object())
        return tokens

    def expr(self):
        """
        The expression function will look for expressions, which itself have the structure and rules we define here.
        Steps:

            1. Look for a term, call it X
            2. Advance ---->
            3. If '+' or '-' 
            4. Advance ---->
            5. Look for another term, call it Y
            6. Create a PlusNode or a MinusNode representing X +/- Y
        """
        # First we assign a self.term() which will call the term functions, whose job it is to look for a term.
        # We store that term object in the result variable.
        # This is important as we first need to look for the term which acts as the X in the expr: X (+|-) Y
        X = self.term()
        # The self.term() will end with a self.advance() and if this method is called it will then see if the next is a plus or minus:
        while self.current_token != None and self.current_token.type in (TokenType.PLUS, TokenType.MINUS):
            if self.current_token.type == TokenType.PLUS:
                # If we arrive at a + operator, we first advance to the next token and call the self.term() once again,
                # This new self.term() is now finding the Y in the expr: X (+|-) Y as currently we were in the + token -> advance
                self.advance()
                X = AddNode(X, self.term())
            elif self.current_token.type == TokenType.MINUS:
                self.advance()
                X = SubNode(X, self.term())
        return X

    def term(self):
        # First we assign a self.term() which will call the term functions, whose job it is to look for a term.
        # We store that term object in the result variable.
        # This is important as we first need to look for the term which acts as the X in the expr: X (*|/) Y
        result = self.power()
        # The self.term() will end with a self.advance() and if this method is called it will then see if the next is a plus or minus:
        while self.current_token != None and self.current_token.type in (TokenType.MULTIPLY, TokenType.DIVIDE):
            if self.current_token.type == TokenType.MULTIPLY:
                # If we arrive at a + operator, we first advance to the next token and call the self.term() once again,
                # This new self.term() is now finding the Y in the expr: X (+|-) Y as currently we were in the + token -> advance
                self.advance()
                result = MulNode(result, self.power())
            elif self.current_token.type == TokenType.DIVIDE:
                self.advance()
                result = DivNode(result, self.power())
        return result

    def power(self):
        """
        Grammar:

            power : object ((POW) expr)
        """
        result = self.object()
        # The self.term() will end with a self.advance() and if this method is called it will then see if the next is a plus or minus:
        while self.current_token != None and self.current_token.type == TokenType.POW:
            # If we arrive at a + operator, we first advance to the next token and call the self.term() once again,
            # This new self.term() is now finding the Y in the expr: X (+|-) Y as currently we were in the + token -> advance
            self.advance()
            result = PowNode(result, self.object())
        return result

    def object(self):
        token = self.current_token
        if token.type == TokenType.LPAREN:
            self.advance()
            result = self.expr()
            if self.current_token.type != TokenType.RPAREN:
                self.raise_error()
            self.advance()
            return result
        elif token.type == TokenType.FLOAT:
            self.advance()
            return FloatNode(token.value)
        elif token.type == TokenType.INTEGER:
            self.advance()
            return IntNode(token.value)
        elif token.type == TokenType.TENSOR:
            self.advance()
            return TensorNode(token.value)
        elif token.type == TokenType.VARIABLE:
            self.advance()
            return VariableNode(token.value)
        elif token.type == TokenType.PLUS:
            self.advance()
            return PlusNode(self.object())
        elif token.type == TokenType.MINUS:
            self.advance()
            return MinusNode(self.object())
        # NEEDS IMPROVEMENT: From here on, there woll be a lot of repeated code, which can be wraped in a single function call.
        elif token.type == TokenType.INTEGRAL:
            self.advance()
            if self.current_token.type != TokenType.LPAREN:
                self.raise_error()
            self.advance()
            expression_to_integrate = self.expr()
            if self.current_token.type != TokenType.COMMA:
                self.raise_error()
            self.advance()
            wrt_variables = self.find_variables()
            if self.current_token.type != TokenType.RPAREN:
                self.raise_error()
            self.advance()
            return IntegrateNode(expression_to_integrate, wrt_variables)
        elif token.type == TokenType.DIFFERENTIAL:
            self.advance()
            if self.current_token.type != TokenType.LPAREN:
                self.raise_error()
            self.advance()
            expression_to_integrate = self.expr()
            if self.current_token.type != TokenType.COMMA:
                self.raise_error()
            self.advance()
            wrt_variables = self.find_variables()
            if self.current_token.type != TokenType.RPAREN:
                self.raise_error()
            self.advance()
            return DifferentialNode(expression_to_integrate, wrt_variables)
        self.raise_error()


text = '.16+12'

In [644]:
lexer = Lexer(text)
tokens = lexer.generate_tokens()
list(tokens)

RuntimeError: generator raised StopIteration

In [636]:
lexer = Lexer(text)
tokens = lexer.generate_tokens()
parser = Parser(tokens)
tree = parser.parse()
tree

((10) + (2))

In [637]:
class Interpreter:

    def compute(self, node):
        if type(node).__name__ == 'AddNode':
            return MathJSON(node.node_a.dict)+MathJSON(node.node_a.dict)

    def visit(self, node):
        if isinstance(node, list):
            return [getattr(self, f"visit_{type(i).__name__}")(i).dict for i in node]
        else:
            method_name = f"visit_{type(node).__name__}"
            method = getattr(self, method_name)
            return method(node)

    def visit_IntNode(self, node):
        return MathJSON(node.dict)

    def visit_FloatNode(self, node):
        return MathJSON(node.dict)

    def visit_TensorNode(self, node):
        return MathJSON(node.dict)

    def visit_VariableNode(self, node):
        return MathJSON(node.dict)

    def visit_PowNode(self, node):
        return MathJSON(self.visit(node.node_a).dict) ** MathJSON(self.visit(node.node_b).dict)

    def visit_AddNode(self, node):
        return MathJSON(self.visit(node.node_a).dict) + MathJSON(self.visit(node.node_b).dict)

    def visit_SubNode(self, node):
        return MathJSON(self.visit(node.node_a).dict) - MathJSON(self.visit(node.node_b).dict)

    def visit_MulNode(self, node):
        return MathJSON(self.visit(node.node_a).dict) * MathJSON(self.visit(node.node_b).dict)

    def visit_DivNode(self, node):
        return MathJSON(self.visit(node.node_a).dict) / MathJSON(self.visit(node.node_b).dict)

    def visit_DifferentialNode(self, node):
        return MathJSON(self.visit(node.node_a).dict).differentiate(MathJSON(self.visit(node.node_b)))

    def visit_IntegrateNode(self, node):
        return MathJSON(self.visit(node.node_a).dict).integrate(MathJSON(self.visit(node.node_b)))


inter = Interpreter()
value = inter.visit(tree)
value.dict


{'ADD': [{'NUMBER_INT': '10'}, {'NUMBER_INT': '2'}]}

# CONSTANTS

In [623]:
DIGITS = '0987654321'
LETTERS = 'qwertyuioplkjhgfdsazxcvbnmQWERTYUIOPLKJHGFDSAZXCVBNM'
OBJECT_BRACKETS = '[]{}'
OBJECT = LETTERS + DIGITS + OBJECT_BRACKETS + OBJECT_CHARACTERS

# TOKENS

# POSITION

# PARSER

In [91]:
class Position:
    def __init__(self, index):
        self.index = index

    def advance(self):
        self.index += 1

    def copy(self):
        return Position(self.index)

# LEXER

# ERROR (Outdated)

In [132]:
class Error:
    def __init__(self, pos_start, pos_end, error_name, details):
        self.pos_start = pos_start
        self.pos_end = pos_end
        self.error_name = error_name
        self.details = details

    def as_string(self):
        result = f'{self.error_name}: {self.details}'
        return result

class IllegalCharError(Error):
    def __init__(self, details):
        super().__init__('Illegal Character', details)
