In [16]:
import openpyxl
import formulas
from formulas.tokens import Token
import re
from formulas.tokens.operand import Range

path = "./formula.xlsx"
wb = openpyxl.load_workbook(path)
ws = wb.active

match_groups = []


def split_alpha_num(s):
    m = re.match(r"([A-Za-z]+)(\d*)", s)  # number part optional
    if m:
        return m.group(1), m.group(2) or None  # return None if no digits
    return None, None


def get_alpha_num(token: Range):
    if "__alpha_num" in token.attr:
        return token.attr["__alpha_num"]

    data = split_alpha_num(token.name)
    token.attr["__alpha_num"] = data
    return data


def does_match(t1: list[Token], t2: list[Token]):
    if len(t1) != len(t2):
        return False

    for right_token, left_token in zip(t1, t2):
        right_token.attr.get("split_alpha_num")

        if right_token.__class__ is not left_token.__class__:
            return False
        if isinstance(right_token, Range):
            right_word, _ = get_alpha_num(right_token)
            left_word, _ = get_alpha_num(left_token)

            if right_word != left_word:
                return False

    return True


for row in ws.iter_rows(values_only=True):
    for cell in row:
        if isinstance(cell, str) and cell.startswith("="):
            group_detected = False
            new_tokens, _ = formulas.Parser().ast(cell)
            for token in new_tokens:
                if isinstance(token, Range):
                    get_alpha_num(token)

            for group in match_groups:
                sample = group[0]
                if does_match(new_tokens, sample):
                    group.append(new_tokens)
                    group_detected = True
                    break
            if not group_detected:
                match_groups.append([new_tokens])

match_groups

[[[A2 <Range>, ^ <Operator>, POWER <Range>],
  [A3 <Range>, ^ <Operator>, POWER <Range>],
  [A4 <Range>, ^ <Operator>, POWER <Range>],
  [A5 <Range>, ^ <Operator>, POWER <Range>],
  [A6 <Range>, ^ <Operator>, POWER <Range>],
  [A7 <Range>, ^ <Operator>, POWER <Range>],
  [A8 <Range>, ^ <Operator>, POWER <Range>],
  [A9 <Range>, ^ <Operator>, POWER <Range>],
  [A10 <Range>, ^ <Operator>, POWER <Range>],
  [A11 <Range>, ^ <Operator>, POWER <Range>],
  [A12 <Range>, ^ <Operator>, POWER <Range>],
  [A13 <Range>, ^ <Operator>, POWER <Range>],
  [A14 <Range>, ^ <Operator>, POWER <Range>]],
 [[A2 <Range>, ^ <Operator>, K1 <Range>],
  [A2 <Range>, ^ <Operator>, K1 <Range>],
  [A3 <Range>, ^ <Operator>, K2 <Range>],
  [A4 <Range>, ^ <Operator>, K3 <Range>],
  [A4 <Range>, ^ <Operator>, K1 <Range>],
  [A5 <Range>, ^ <Operator>, K4 <Range>],
  [A5 <Range>, ^ <Operator>, K1 <Range>],
  [A6 <Range>, ^ <Operator>, K5 <Range>],
  [A6 <Range>, ^ <Operator>, K1 <Range>],
  [A7 <Range>, ^ <Operator>, K6

In [17]:
def find_ranges(nums):
    ranges = []
    start = prev = nums[0]

    for n in nums[1:]:
        if n == prev + 1:
            prev = n
        else:
            if start != prev:
                ranges.append((start, prev))
            else:
                ranges.append(start)
            start = prev = n

    if start != prev:
        ranges.append((start, prev))
    else:
        ranges.append(start)
    return ranges


def get_range_info(group):
    range_dict = {}
    for formula in group:
        for idx, token in enumerate(formula):
            if isinstance(token, Range):
                _, digit = token.attr["__alpha_num"]
                if digit:
                    if idx not in range_dict:
                        range_dict[idx] = {digit}
                    else:
                        range_dict[idx].add(digit)

    return range_dict


def convert_ranges_to_string(data):
    parts = []
    if len(data) == 1 and isinstance(data[0], int):
        return str(data[0])

    for item in data:
        if isinstance(item, tuple):
            parts.append(f"{item[0]}-{item[1]}")
        else:
            parts.append(str(item))
    return "<" + ",".join(parts) + ">"


result = []

for group in match_groups:
    range_dict = get_range_info(group)
    for idx, nums in range_dict.items():
        range_dict[idx] = convert_ranges_to_string(find_ranges(sorted(map(int, nums))))

    sample = group[0]
    final_formula = ""
    for idx, token in enumerate(sample):
        if idx in range_dict:
            word, _ = token.attr["__alpha_num"]
            final_formula += word + range_dict[idx]
        else:
            final_formula += token.name
    result.append(final_formula)

result

['A<2-14>^POWER', 'A<2-16,20>^K<1-13,99>', 'B3^L2', 'C3^M2', 'D3^N2', 'E3^O2']