In [None]:
import json



In [None]:
with open("parsed.json", "r", encoding="utf-8") as f:
    data = json.load(f)


In [None]:
import re
from html import escape

def col_to_idx(col):
    """A -> 0, B -> 1, Z -> 25, AA -> 26"""
    idx = 0
    for c in col:
        idx = idx * 26 + (ord(c) - ord("A") + 1)
    return idx - 1

def idx_to_col(idx):
    s = ""
    idx += 1
    while idx:
        idx, r = divmod(idx - 1, 26)
        s = chr(r + 65) + s
    return s

def split_cell_ref(ref):
    m = re.match(r"([A-Z]+)(\d+)", ref)
    return m.group(1), int(m.group(2))

def parse_range(rng):
    start, end = rng.split(":")
    sc, sr = split_cell_ref(start)
    ec, er = split_cell_ref(end)
    return (
        sr, er,
        col_to_idx(sc), col_to_idx(ec)
    )


In [None]:
def build_grid(rows):
    if not rows:
        return []

    max_row = max(r["row"] for r in rows)
    max_col = 0

    for r in rows:
        for ref in r["cells"]:
            col, _ = split_cell_ref(ref)
            max_col = max(max_col, col_to_idx(col))

    grid = [
        ["" for _ in range(max_col + 1)]
        for _ in range(max_row)
    ]

    for r in rows:
        r_idx = r["row"] - 1
        for ref, val in r["cells"].items():
            col, _ = split_cell_ref(ref)
            c_idx = col_to_idx(col)
            grid[r_idx][c_idx] = val or ""

    return grid


In [None]:
def apply_merges(grid, merged_cells):
    spans = {}
    skip = set()

    for rng in merged_cells:
        r1, r2, c1, c2 = parse_range(rng)
        rowspan = r2 - r1 + 1
        colspan = c2 - c1 + 1

        spans[(r1-1, c1)] = {
            "rowspan": rowspan,
            "colspan": colspan
        }

        for r in range(r1-1, r2):
            for c in range(c1, c2+1):
                if (r, c) != (r1-1, c1):
                    skip.add((r, c))

    return spans, skip


In [None]:
def grid_to_html(grid, spans, skip):
    html = ["<table border='1'>"]

    for r, row in enumerate(grid):
        html.append("<tr>")
        for c, val in enumerate(row):
            if (r, c) in skip:
                continue

            attrs = ""
            if (r, c) in spans:
                sp = spans[(r, c)]
                if sp["rowspan"] > 1:
                    attrs += f' rowspan="{sp["rowspan"]}"'
                if sp["colspan"] > 1:
                    attrs += f' colspan="{sp["colspan"]}"'

            html.append(f"<td{attrs}>{escape(str(val))}</td>")
        html.append("</tr>")

    html.append("</table>")
    return "".join(html)


In [None]:
rows = []
grids = []
for sheet in data:
    rows.append(sheet.get("rows",[]))
    grid = build_grid(sheet.get("rows", []))
    spans, skip = apply_merges(grid, sheet.get("merged_cells", []))
    html = grid_to_html(grid, spans, skip)
        
    grids.append(grid)
    # print(grid)

In [None]:
with open("tables.json", "r", encoding="utf-8") as f:
    tables = json.load(f)


In [None]:
def merge_strings(str_list, separator=""):
    if not isinstance(str_list, list):
        raise TypeError("Input must be a list.")
    
    filtered_list = [str(item) for item in str_list]
    
    return separator.join(filtered_list)

In [None]:
for drawing in data:
    if drawing['drawings']:
        for i in drawing['drawings']:
            if i['text']:
                # i['text'] = merge_strings(i['text'])
                print(drawing['xlsx_name'])
                print(drawing['sheet_name'])
                print(i)

In [None]:
tables

In [None]:
len(data)

In [None]:
for sheet in data: