In [129]:

import re
from collections import namedtuple

In [130]:
def tokenize(string):
    string = string.replace(' ', '')
    return [ i for i in re.finditer('(\+|\-|((?<=[^a-z])\((?=[^a-z]))|((?<=[^a-z])\)(?=[^a-z]))|\*|\/)', string) if i]
input_box = '(1/2)*function(my, god, it, works)+ G^{a}^{b}*(D_{c}*G_{b}_{d} + D_{d}*G_{b}_{c} - D_{b}*G_{c}_{d})'
tokenize(input_box)

[<re.Match object; span=(2, 3), match='/'>,
 <re.Match object; span=(4, 5), match=')'>,
 <re.Match object; span=(5, 6), match='*'>,
 <re.Match object; span=(31, 32), match='+'>,
 <re.Match object; span=(41, 42), match='*'>,
 <re.Match object; span=(42, 43), match='('>,
 <re.Match object; span=(48, 49), match='*'>,
 <re.Match object; span=(58, 59), match='+'>,
 <re.Match object; span=(64, 65), match='*'>,
 <re.Match object; span=(74, 75), match='-'>,
 <re.Match object; span=(80, 81), match='*'>]

In [131]:
class StringToMathJson:
    def __init__(self, string):
        self.string = string
        self.OpBehaviour = namedtuple('OpBehaviour', 'priority lmbd')
        self.operations = {"+": self.OpBehaviour(0, lambda x, y: y+x),
                           "-": self.OpBehaviour(0, lambda x, y: y-x),
                           "/": self.OpBehaviour(1, lambda x, y: y/x),
                           "*": self.OpBehaviour(1, lambda x, y: y*x),
                           "^": self.OpBehaviour(2, lambda x, y: y**x)}

    def tokenize(self):
        string = self.string.replace(' ', '')
        return [ i for i in re.split('(\+|\-|\(|\)|\*|\/)', string) if i]

    def match_tensors(self, i):
        string = i
        pattern = lambda x : "([a-zA-Z]+)([_^]\{[a-zA-Z]+\}|[_^]\{[a-zA-Z]+\=[0-9]}){" + str(x) + "}(?=(\*|\)|\+|\-|\/|$))"
        Total = [[x for x in re.finditer(pattern(j), string)] for j in range(1, 11)]
        return [tensor.group() for nested in Total for tensor in nested]

    def match_operators(self, i):
        """
            Make checks as to what the name of the function is:
                - Integrate
                - solve
                - diff
                - subs
                or just declared functions
                - f(x)
                - etc...
            Then, make checks on the names and structure of the parameters.
            And finally parse the object into the relevant sympy object and return it into the MathJSON object.
        """
        Input = i.replace(' ', '')
        Function = '(?<![a-zA-Z])' + '([a-zA-Z]+)' + '(\(([a-z]+\))' + '|' + '\(([a-z]+\,)*[a-z]\))'
        return [x for x in re.finditer(Function, Input)]

    def json_wrapped_token(self):
        tokens = self.tokenize()
        for i in range(len(tokens)):
            if tokens[i] not in ['+','-','/','(',')','*']:
                if bool(self.match_operators(tokens[i])):
                    tokens[i] = MathJSON({"operators" : tokens[i]})
                elif bool(self.match_tensors(tokens[i])):
                    tokens[i] = MathJSON({"tensor_string_representation" : tokens[i]})
                elif bool(re.match('[0-9]+', tokens[i].replace('.','',1))):
                    tokens[i] = MathJSON({"number" : tokens[i]})
        return tokens

    def to_rpn(self):
        tokens = self.json_wrapped_token()
        rpn_tokens = []
        op_stack = []

        for token in tokens:
            # Add number to rpn tokens
            if isinstance(token, MathJSON):
                rpn_tokens.append(token)
            # Add opening bracket to operation stack
            elif token == "(":
                op_stack.append(token)
            # Consumes all operations until matching opening bracket
            elif token == ")":
                while op_stack[-1] != "(":
                    rpn_tokens.append(op_stack.pop())
                op_stack.pop()
            elif token in list(self.operations.keys()):
                try:
                    # Check if we have operations that have higher priority on
                    # the op_stack and add them to rpn_tokens so that they are evaluated first:
                    token_priority = self.operations[token].priority
                    while op_stack[-1] != "(" and self.operations[op_stack[-1]].priority >= token_priority:
                        rpn_tokens.append(op_stack.pop())
                except IndexError:  # op_stack is empty
                    pass
                # Add the current operation to the op_stack:
                op_stack.append(token)

        # Add remaining operations to rpn tokens
        while len(op_stack) != 0:
            rpn_tokens.append(op_stack.pop())

        return rpn_tokens

    def calculate(self):
        rpn_tokens = self.to_rpn()
        val_stack = []

        for token in rpn_tokens:
            if isinstance(token, MathJSON):
                val_stack.append(token)
            elif token in list(self.operations.keys()):
                args = []
                for x in range(self.operations[token].lmbd.__code__.co_argcount):
                    # If this throws an error user didn't give enough args
                    args.append(val_stack.pop())
                result = self.operations[token].lmbd(*args)
                val_stack.append(result)

        # If the value stack is bigger than one we probably made an error with the input
        assert len(val_stack) == 1
        return val_stack[0]



input_box = '(1/2)*function(my, god, it, works)+ G^{a}^{b}*(D_{c}*G_{b}_{d} + D_{d}*G_{b}_{c} - D_{b}*G_{c}_{d})'
StringToMathJson(input_box).json_wrapped_token()

['(',
 <__main__.MathJSON at 0x107d5da00>,
 '/',
 <__main__.MathJSON at 0x107d5d880>,
 ')',
 '*',
 'function',
 '(',
 'my,god,it,works',
 ')',
 '+',
 <__main__.MathJSON at 0x107d5d8e0>,
 '*',
 '(',
 <__main__.MathJSON at 0x107d5d040>,
 '*',
 <__main__.MathJSON at 0x107d5d220>,
 '+',
 <__main__.MathJSON at 0x107d5d1c0>,
 '*',
 <__main__.MathJSON at 0x107d5d160>,
 '-',
 <__main__.MathJSON at 0x107d5d310>,
 '*',
 <__main__.MathJSON at 0x107d5d2e0>,
 ')']

In [132]:
class StringToMathJson:
    def __init__(self, string):
        self.string = string
        self.OpBehaviour = namedtuple('OpBehaviour', 'priority lmbd')
        self.operation_weight = { "+" : 0, "-" : 0, "/" : 1, "*" : 1, "^" : 2}
        self.operations = {"+": self.OpBehaviour(0, lambda x, y: y+x),
                           "-": self.OpBehaviour(0, lambda x, y: y-x),
                           "/": self.OpBehaviour(1, lambda x, y: y/x),
                           "*": self.OpBehaviour(1, lambda x, y: y*x),
                           "^": self.OpBehaviour(2, lambda x, y: y**x)}

    def tokenize(self):
        string = self.string.replace(' ', '')
        return [ i for i in re.split('(\+|\-|\(|\)|\*|\/)', string) if i]

    def match_tensors(self, i):
        string = i
        pattern = lambda x : "([a-zA-Z]+)([_^]\{[a-zA-Z]+\}|[_^]\{[a-zA-Z]+\=[0-9]}){" + str(x) + "}(?=(\*|\)|\+|\-|\/|$))"
        Total = [[x for x in re.finditer(pattern(j), string)] for j in range(1, 11)]
        return [tensor.group() for nested in Total for tensor in nested]

    def match_operators(self, i):
        """
            Make checks as to what the name of the function is:
                - Integrate
                - solve
                - diff
                - subs
                or just declared functions
                - f(x)
                - etc...
            Then, make checks on the names and structure of the parameters.
            And finally parse the object into the relevant sympy object and return it into the MathJSON object.
        """
        Input = i.replace(' ', '')
        Function = '(?<![a-zA-Z])' + '([a-zA-Z]+)' + '(\(([a-z]+\))' + '|' + '\(([a-z]+\,)*[a-z]\))'
        return [x for x in re.finditer(Function, Input)]

    def json_wrapped_token(self):
        tokens = self.tokenize()
        for i in range(len(tokens)):
            if tokens[i] not in ['+','-','/','(',')','*']:
                if bool(self.match_operators(tokens[i])):
                    tokens[i] = MathJSON({"operators" : tokens[i]})
                elif bool(self.match_tensors(tokens[i])):
                    tokens[i] = MathJSON({"tensor_string_representation" : tokens[i]})
                elif bool(re.match('[0-9]+', tokens[i].replace('.','',1))):
                    tokens[i] = MathJSON({"number" : tokens[i]})
        return tokens

    def to_rpn(self):
        tokens = self.json_wrapped_token()
        rpn_tokens = []
        op_stack = []

        for token in tokens:
            # Add number to rpn tokens
            if isinstance(token, MathJSON):
                rpn_tokens.append(token)
            # Add opening bracket to operation stack
            elif token == "(":
                op_stack.append(token)
            # Consumes all operations until matching opening bracket
            elif token == ")":
                while op_stack[-1] != "(":
                    rpn_tokens.append(op_stack.pop())
                op_stack.pop()
            elif token in list(self.operations.keys()):
                try:
                    # Check if we have operations that have higher priority on
                    # the op_stack and add them to rpn_tokens so that they are evaluated first:
                    token_priority = self.operation_weight[token]
                    while op_stack[-1] != "(" and self.operation_weight[op_stack[-1]] >= token_priority:
                        rpn_tokens.append(op_stack.pop())
                except IndexError:  # op_stack is empty
                    pass
                # Add the current operation to the op_stack:
                op_stack.append(token)

        # Add remaining operations to rpn tokens
        while len(op_stack) != 0:
            rpn_tokens.append(op_stack.pop())

        return rpn_tokens

    def calculate(self):
        rpn_tokens = self.to_rpn()
        val_stack = []

        for token in rpn_tokens:
            if isinstance(token, MathJSON):
                val_stack.append(token)
            elif token in list(self.operations.keys()):
                args = []
                for x in range(self.operation_weight[token]):
                    # If this throws an error user didn't give enough args
                    args.append(val_stack.pop())
                result = self.operations[token].lmbd(*args)
                val_stack.append(result)

        # If the value stack is bigger than one we probably made an error with the input
        assert len(val_stack) == 1
        return val_stack[0]



input_box = '(1/2)*G^{a}^{b}*(D_{c}*G_{b}_{d} + D_{d}*G_{b}_{c} - D_{b}*G_{c}_{d})'
StringToMathJson(input_box).calculate().objectJson

TypeError: <lambda>() missing 1 required positional argument: 'y'

In [105]:
operations = {"+": lambda x, y: y+x,
              "-":  lambda x, y: y-x,
              "/": lambda x, y: y/x,
              "*": lambda x, y: y*x,
              "^": lambda x, y: y**x}
list(operations.keys())
bool(re.match('[0-9]+', '123|'))

True

In [99]:
OpBehaviour = namedtuple('OpBehaviour', 'priority lmbd')

operations = {
    "+": OpBehaviour(0, lambda x, y: y+x),
    "-": OpBehaviour(0, lambda x, y: y-x),
    "/": OpBehaviour(1, lambda x, y: y/x),
    "*": OpBehaviour(1, lambda x, y: y*x),
    "^": OpBehaviour(2, lambda x, y: y**x),
}

operations["/"].lmbd.__code__.co_argcount


2

In [61]:
def tokenize(string):
    string = string.replace(" ", "")

    # What the fuck?
    float_regex = "\d*\.?\d+|[\(\)]"
    for key in operations.keys():
        if len(key) == 1:
            float_regex += "|" + "[\\" + key + "]"
        else:
            float_regex += "|" + key
        # "|".join(operations)
        # pass
    rgx = re.compile(float_regex)

    results = rgx.finditer(string)
    exprList = []
    for reg in results:
        start, end = reg.span()
        exprList.append(string[start: end])
    return exprList

tokens = tokenize("1+7*(3+2)")
tokens

error: nothing to repeat at position 37

In [96]:

class MathJSON:
    def __init__(self, objectJson):
        self.objectJson = objectJson
        
    def __add__(self, other):
        return MathJSON({ 'Add' : [self.objectJson, other.objectJson] })
    
    def __mul__(self, other):
        return MathJSON({ 'Mul' : [self.objectJson, other.objectJson] })
    
    def __sub__(self, other):
        return MathJSON({ 'Sub' : [self.objectJson, other.objectJson] })
    
    def __truediv__(self, other):
        return MathJSON({ 'Div' : [self.objectJson, other.objectJson] })

    def __eq__(self, other):
        return MathJSON({ 'Eq' : [self.objectJson, other.objectJson] })

parsed_tokens = [
                '(',
                 MathJSON({'integer' : '1'}),
                 '/',
                 MathJSON({'integer' : '2'}),
                 ')',
                 '*',
                 MathJSON({'tensor_string_representation' : 'G^{a}^{b}'}),
                 '*',
                 '(',
                 MathJSON({'tensor_string_representation' : 'D_{c}'}),
                 '*',
                 MathJSON({'tensor_string_representation' : 'G_{b}_{d}'}),
                 '+',
                 MathJSON({'tensor_string_representation' : 'D_{d}'}),
                 '*',
                 MathJSON({'tensor_string_representation' : 'G_{b}_{c}'}),
                 '-',
                 MathJSON({'tensor_string_representation' : 'D_{b}'}),
                 '*',
                 MathJSON({'tensor_string_representation' : 'G_{c}_{d}'}),
                 ')'
                 ]

In [97]:
def to_rpn(tokens):
    rpn_tokens = []
    op_stack = []

    for token in tokens:
        # Add number to rpn tokens
        if isinstance(token, MathJSON):
            rpn_tokens.append(token)
        # Add opening bracket to operation stack
        elif token == "(":
            op_stack.append(token)
        # Consumes all operations until matching opening bracket
        elif token == ")":
            while op_stack[-1] != "(":
                rpn_tokens.append(op_stack.pop())
            op_stack.pop()
        elif token in list(operations.keys()):
            try:
                # Check if we have operations that have higher priority on
                # the op_stack and add them to rpn_tokens so that they are evaluated first:
                token_priority = operations[token].priority
                while op_stack[-1] != "(" and operations[op_stack[-1]].priority >= token_priority:
                    rpn_tokens.append(op_stack.pop())
            except IndexError:  # op_stack is empty
                pass
            # Add the current operation to the op_stack:
            op_stack.append(token)

    # Add remaining operations to rpn tokens
    while len(op_stack) != 0:
        rpn_tokens.append(op_stack.pop())

    return rpn_tokens

test = to_rpn(parsed_tokens)

AttributeError: 'function' object has no attribute 'priority'

In [53]:
def to_rpn(tokens):
    rpn_tokens = []
    op_stack = []

    for token in tokens:
        # Add number to rpn tokens
        if (is_float(token)):
            rpn_tokens.append(token)
        # Add opening bracket to operation stack
        elif token == "(":
            op_stack.append(token)
        # Consumes all operations until matching opening bracket
        elif token == ")":
            while op_stack[-1] != "(":
                rpn_tokens.append(op_stack.pop())
            op_stack.pop()
        elif token in list(operations.keys()):
            try:
                # Check if we have operations that have higher priority on
                # the op_stack and add them to rpn_tokens so that they are evaluated first:
                token_priority = operations[token].priority
                while op_stack[-1] != "(" and operations[op_stack[-1]].priority >= token_priority:
                    rpn_tokens.append(op_stack.pop())
            except IndexError:  # op_stack is empty
                pass
            # Add the current operation to the op_stack:
            op_stack.append(token)

    # Add remaining operations to rpn tokens
    while len(op_stack) != 0:
        rpn_tokens.append(op_stack.pop())

    return rpn_tokens

to_rpn(test)

AttributeError: 'MathJSON' object has no attribute 'replace'

In [59]:
def calculate(rpn_tokens):
    val_stack = []

    for token in rpn_tokens:
        if isinstance(token, MathJSON):
            val_stack.append(token)
        elif token in list(operations.keys()):
            args = []
            for x in range(operations[token].lmbd.__code__.co_argcount):
                # If this throws an error user didn't give enough args
                args.append(val_stack.pop())
            result = operations[token].lmbd(*args)
            val_stack.append(result)

    # If the value stack is bigger than one we probably made an error with the input
    assert len(val_stack) == 1
    return val_stack[0]

calculate(test).objectJson

{'Mul': [{'Mul': [{'Div': [{'integer': '1'}, {'integer': '2'}]},
    {'tensor_string_representation': 'G^{a}^{b}'}]},
  {'Sub': [{'Add': [{'Mul': [{'tensor_string_representation': 'D_{c}'},
        {'tensor_string_representation': 'G_{b}_{d}'}]},
      {'Mul': [{'tensor_string_representation': 'D_{d}'},
        {'tensor_string_representation': 'G_{b}_{c}'}]}]},
    {'Mul': [{'tensor_string_representation': 'D_{b}'},
      {'tensor_string_representation': 'G_{c}_{d}'}]}]}]}

In [44]:
def calculate_string(string):
    tokenized = tokenize(string)
    rpn = to_rpn(tokenized)
    return calculate(rpn)

calculate_string('1+1*(2-5)')

-2.0

In [None]:
MathJson = {'Mul': [{'Mul': [{'Div': [{'integer': '1'}, {'integer': '2'}]},
    {'tensor_string_representation': 'G^{a}^{b}'}]},
  {'Sub': [{'Add': [{'Mul': [{'tensor_string_representation': 'D_{c}'},
        {'tensor_string_representation': 'G_{b}_{d}'}]},
      {'Mul': [{'tensor_string_representation': 'D_{d}'},
        {'tensor_string_representation': 'G_{b}_{c}'}]}]},
    {'Mul': [{'tensor_string_representation': 'D_{b}'},
      {'tensor_string_representation': 'G_{c}_{d}'}]}]}]}

In [None]:
{'Mul': [{'Mul': [{'Div': [{'integer': '1'}, {'integer': '2'}]},
    {'tensor_string_representation': 'G^{a}^{b}'}]},
  {'Sub': [{'Add': [{'Mul': [{'tensor_string_representation': 'D_{c}'},
        {'tensor_string_representation': 'G_{b}_{d}'}]},
      {'Mul': [{'tensor_string_representation': 'D_{d}'},
        {'tensor_string_representation': 'G_{b}_{c}'}]}]},
    {'Mul': [{'tensor_string_representation': 'D_{b}'},
      {'tensor_string_representation': 'G_{c}_{d}'}]}]}]}