In [None]:
import pandas as pd
from sympy import sympify, simplify
from sympy.printing.mathml import mathml
import re

In [None]:
def remove_outer_mrow(expression):
    """
    Removes the outer <mrow> tag from the math ML.
    """

    pattern = r'^<mrow>(.*?)</mrow>$'
    matches = re.findall(pattern, expression)

    if matches:
        return max(matches, key=len)
    
    return expression   # Return original expression if no match

In [None]:
def build_mathml(expr_str):
    """Converts a mathematical expression into a Math Markup Language"""
    
    expr = sympify(expr_str, evaluate=False)
    mathml_expression = mathml(expr, printer='presentation')

    mathml_expression = remove_outer_mrow(mathml_expression)

    return mathml_expression

In [None]:
def expression_generator():
    def alphabet_generator():
        for letter in 'abcdefghijklmnopqrstuvwxyz':
            yield letter
    generator = alphabet_generator()

    for letter in generator:
        yield f"exp{letter}"

In [None]:
def build_expression_map(vars):
    map = {}
    expression = expression_generator()       
    for var in vars:
        map[var] = next(expression)
    
    return map

In [None]:
def swap_string(input_string, mapping):
    """
    Replace the words in a string based on the provided dictionary map.
    """
    for key, value in mapping.items():
        input_string = input_string.replace(key, value)
    
    return input_string

In [None]:
def swap_mapping(mapping: dict):
    """
    Swaps the key-value pair to value:key dictionary. Example, {"foo":"bar"} becomes {"bar":"foo$identifier}.
    The $identfier will be used for string replacement. 
    """
    new_map = {}
    for key, value in mapping.items():
        new_map[value] = key + "$identifier"
    
    return new_map


In [None]:
df_mdrm = pd.DataFrame({"mdrm" : ["RFCD1234 + RFCD1235", "(RFCD1234 + RFCD1235 + RFCD1236)/100", "(RFCD1234 + RFCD1235)/RFCD1236"]})

pattern = r'(RFCD\d+)'

df_mdrm["extracted"] = df_mdrm['mdrm'].apply(lambda x: re.findall(pattern, x))

df_mdrm["expression_map"] = df_mdrm["extracted"].apply(lambda x: build_expression_map(x))

df_mdrm["math_expression"] = df_mdrm.apply(lambda row: swap_string(row["mdrm"], row["expression_map"]), axis=1)

df_mdrm["math_ml"] = df_mdrm["math_expression"].apply(build_mathml)

df_mdrm["var_to_mdrm_map"] = df_mdrm["expression_map"].apply(swap_mapping)

df_mdrm["formatted_math_ml"] = df_mdrm.apply(lambda row: swap_string(row["math_ml"], row["var_to_mdrm_map"]), axis=1)
