# Formula Exploration Notebook

This notebook holds the attempt to explore how we could operate formulas in spreadsheet cell values.

In [7]:
# Expanding cell ranges

from openpyxl.worksheet.datavalidation import expand_cell_ranges

sample_cell_range = "B2:B5"
sample_expanded_cells = expand_cell_ranges(sample_cell_range)
print(f"Expanded cell list from range of \"{sample_cell_range}\": {sample_expanded_cells}")

Expanded cell list from range of "B2:B5": {'B4', 'B5', 'B3', 'B2'}


In [10]:
# Parse the formula

sample_expanded_formula = "+".join(sample_expanded_cells)
print(sample_expanded_formula)

# We can use the sample_expanded_formula for comparison!

B4+B5+B3+B2


In [16]:
# Now, what about parsing / tokenizing the formulas?

from openpyxl.worksheet.datavalidation import expand_cell_ranges
from openpyxl.formula import Tokenizer
from openpyxl.formula.tokenizer import Token

def simplify_sum(cell_range):
    """
    This method will simplify the range in a SUM function so it could be parsed by Sympy.
    """
    expanded_cells = expand_cell_ranges(cell_range)
    return "+".join(expanded_cells)

def simplify_formula(formula):
    """
    Returns simplified version of excel formula, if any.
    """
    # TODO: This only handles SUM for now.
    formula_tokenizer = Tokenizer(formula)
    pending_formulas = []
    item_buffer = []
    
    # print(f"Tokenized form of formula {formula}:")
    for token in formula_tokenizer.items:
        # print(token)
        if token.type == Token.FUNC and token.subtype == Token.OPEN:
            pending_formulas.append(token)
        elif token.type == Token.FUNC and token.subtype == Token.CLOSE:
            current_formula = pending_formulas.pop()
            # TODO: Update this assumption later.
            return simplify_sum(item_buffer[0].value)
        elif token.type != Token.SEP:
            item_buffer.append(token)
    
    # TODO: Map this out with proper case later.
    return formula.replace("=","")

In [17]:
# Test the method above

from sympy.parsing.sympy_parser import parse_expr
from sympy import simplify

formula_1 = "=SUM(B2:B5)"
formula_2 = "=B2+B3+B4+B5"

formula_1_simplified = simplify_formula(formula_1)
formula_2_simplified = simplify_formula(formula_2)

print(f"Simplified version of formula_1: {formula_1_simplified}")
print(f"Simplified version of formula_2: {formula_2_simplified}")

# Compare them using Sympy

formula_1_parsed = parse_expr(formula_1_simplified)
formula_2_parsed = parse_expr(formula_2_simplified)

print(f"Parsed formula_1's type: {type(formula_1_parsed)}, content: {formula_1_parsed}")
print(f"Parsed formula_2's type: {type(formula_2_parsed)}, content: {formula_2_parsed}")

is_equal_formula = simplify(formula_1_parsed - formula_2_parsed) == 0

print(f"Are the formulas equal: {is_equal_formula}")

Tokenized form of formula =SUM(B2:B5):
FUNC OPEN SUM(:
OPERAND RANGE B2:B5:
FUNC CLOSE ):
Tokenized form of formula =B2+B3+B4+B5:
OPERAND RANGE B2:
OPERATOR-INFIX  +:
OPERAND RANGE B3:
OPERATOR-INFIX  +:
OPERAND RANGE B4:
OPERATOR-INFIX  +:
OPERAND RANGE B5:
Simplified version of formula_1: B3+B2+B4+B5
Simplified version of formula_2: B2+B3+B4+B5
Parsed formula_1's type: <class 'sympy.core.add.Add'>, content: B2 + B3 + B4 + B5
Parsed formula_2's type: <class 'sympy.core.add.Add'>, content: B2 + B3 + B4 + B5
Are the formulas equal: True
