# Formula Exploration Notebook

This notebook holds the attempt to explore how we could operate formulas in spreadsheet cell values.

In [7]:
# Expanding cell ranges

from openpyxl.worksheet.datavalidation import expand_cell_ranges

sample_cell_range = "B2:B5"
sample_expanded_cells = expand_cell_ranges(sample_cell_range)
print(f"Expanded cell list from range of \"{sample_cell_range}\": {sample_expanded_cells}")

Expanded cell list from range of "B2:B5": {'B4', 'B5', 'B3', 'B2'}


In [10]:
# Parse the formula

sample_expanded_formula = "+".join(sample_expanded_cells)
print(sample_expanded_formula)

# We can use the sample_expanded_formula for comparison!

B4+B5+B3+B2


In [29]:
# Now, what about parsing / tokenizing the formulas?

from openpyxl.worksheet.datavalidation import expand_cell_ranges
from openpyxl.formula import Tokenizer
from openpyxl.formula.tokenizer import Token

def simplify_sum_range(cell_range):
    """
    This method will simplify the range in a SUM function so it could be parsed by Sympy.
    """
    expanded_cells = expand_cell_ranges(cell_range)
    return "+".join(expanded_cells)

def simplify_sum_range_list(cell_range_list):
    """
    This method will simplify the passed list of cell ranges in a SUM function so it could be parsed by Sympy.
    """
    
    print(f"Input: {cell_range_list}")
    if len(cell_range_list) == 0:
        return ""
    
    simplified_formula = simplify_sum_range(cell_range_list[0])
    print(f"Current simplified formula: {simplified_formula}")
    
    for cell_range in cell_range_list[1:]:
        current_formula = simplify_sum_range(cell_range)
        simplified_formula += f"+{current_formula}"
        
    print(f"Final simplified formula: {simplified_formula}")
    return simplified_formula

def simplify_formula(formula):
    """
    Returns simplified version of excel formula, if any.
    """
    # TODO: This only handles SUM for now.
    formula_tokenizer = Tokenizer(formula)
    pending_function = []
    item_buffer = []
    
    # print(f"Tokenized form of formula {formula}:")
    for token in formula_tokenizer.items:
        # print(token)
        if token.type == Token.FUNC and token.subtype == Token.OPEN:
            pending_function.append(token)
            
        elif token.type == Token.FUNC and token.subtype == Token.CLOSE:
            current_function = pending_function.pop()
            ranges = [item.value for item in item_buffer]
            return simplify_function_and_data(current_function.value, ranges)
        
        elif token.type != Token.SEP:
            item_buffer.append(token)
    
    # TODO: Map this out with proper case later.
    return formula.replace("=","")

def simplify_function_and_data(function, data):
    """
    Simplifies the passed Excel function string with the passed data list.
    """
    
    print(f"Requested function to be simplified: {function}")
    print(f"Requested data to be simplified: {data}")
    
    function = function.replace("(", "")
    # TODO: Add other functions here besides the SUM.
    if function == "SUM":
        return simplify_sum_range_list(data)
    
    # TODO: Revisit what to do if there's no matching function
    return data
    
    

In [30]:
# Test the method above

from sympy.parsing.sympy_parser import parse_expr
from sympy import simplify

formula_1 = "=SUM(B2:B5)"
formula_2 = "=B2+B3+B4+B5"

formula_1_simplified = simplify_formula(formula_1)
formula_2_simplified = simplify_formula(formula_2)

print(f"Simplified version of formula_1: {formula_1_simplified}")
print(f"Simplified version of formula_2: {formula_2_simplified}")

# Compare them using Sympy

formula_1_parsed = parse_expr(formula_1_simplified)
formula_2_parsed = parse_expr(formula_2_simplified)

print(f"Parsed formula_1's type: {type(formula_1_parsed)}, content: {formula_1_parsed}")
print(f"Parsed formula_2's type: {type(formula_2_parsed)}, content: {formula_2_parsed}")

is_equal_formula = simplify(formula_1_parsed - formula_2_parsed) == 0

print(f"Are the formulas equal: {is_equal_formula}")

Requested function to be simplified: SUM(
Requested data to be simplified: ['B2:B5']
Input: ['B2:B5']
Current simplified formula: B3+B2+B4+B5
Final simplified formula: B3+B2+B4+B5
Simplified version of formula_1: B3+B2+B4+B5
Simplified version of formula_2: B2+B3+B4+B5
Parsed formula_1's type: <class 'sympy.core.add.Add'>, content: B2 + B3 + B4 + B5
Parsed formula_2's type: <class 'sympy.core.add.Add'>, content: B2 + B3 + B4 + B5
Are the formulas equal: True
