In [7]:
%pip install easyocr pandas pillow pix2tex


Defaulting to user installation because normal site-packages is not writeable
Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting scikit-image (from easyocr)
  Downloading scikit_image-0.26.0-cp311-cp311-win_amd64.whl.metadata (15 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.7-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting Shapely (from easyocr)
  Downloading shapely-2.1.2-cp311-cp311-win_amd64.whl.metadata (7.1 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.4.0-cp311-cp311-win_amd64.whl.metadata (8.8 kB)
Collecting ninja (from easyocr)
  Using cached ninja-1.13.0-py3-none-win_amd64.whl.metadata (5.1 kB)
Collecting imageio!=2.35.0,>=2.33 (from scikit-image->easyocr)
  Downloading imageio-2.37.2-py3-none-any.whl.metadata (9.7 kB)
Collecting tifffile>=2022.8.12 (from scikit-image->easyocr)
  Downloading tifffile-2026.1.28-py3-none-any.whl.metadata (30 kB)
Collecting lazy-loader>=0.4 (from scikit


[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
from io import BytesIO
import os
import re
import pandas as pd
from PIL import Image
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML

import easyocr
from pix2tex.cli import LatexOCR

# -------------------- Load models once --------------------
# EasyOCR downloads models on first run (can take a bit the first time)
ocr_reader = easyocr.Reader(['en'], gpu=False)  # set gpu=True if you have a CUDA GPU
latex_model = LatexOCR()

# -------------------- UI Widgets --------------------
title = widgets.HTML(
    "<h2 style='margin:0'>ðŸ“¥ Question Image â†’ CSV (with LaTeX)</h2>"
    "<div style='color:#888;margin-top:4px'>Upload image â†’ extract question & choices â†’ append to CSV.</div>"
)

upload = widgets.FileUpload(accept="image/*", multiple=False, description="Upload image")

csv_path = widgets.Text(
    value="questions.csv",
    description="CSV path:",
    style={"description_width": "80px"},
    layout=widgets.Layout(width="600px")
)

append_btn = widgets.Button(description="Append to CSV", button_style="success", icon="plus",
                            layout=widgets.Layout(width="160px", height="40px"))

clear_btn = widgets.Button(description="Clear", button_style="warning", icon="trash",
                           layout=widgets.Layout(width="120px", height="40px"))

status = widgets.HTML("<span style='color:#888'>Status: waitingâ€¦</span>")

preview_out = widgets.Output(layout=widgets.Layout(border="1px solid #333", padding="10px", width="100%"))
result_out = widgets.Output(layout=widgets.Layout(border="1px solid #333", padding="10px", width="100%"))

# -------------------- Helpers --------------------
def load_uploaded_pil_image():
    """
    VS Code ipywidgets FileUpload:
      - upload.value is a tuple of dict(s)
      - dict has key "content" which is bytes-like (often memoryview)
    """
    if not upload.value:
        return None, None
    file = upload.value[0]
    name = file.get("name", "uploaded_image")
    content = file["content"]
    img = Image.open(BytesIO(content)).convert("RGB")
    return img, name

def ocr_text_lines(pil_img):
    """
    Returns OCR lines in reading order.
    """
    import numpy as np
    arr = np.array(pil_img)
    results = ocr_reader.readtext(arr, detail=0, paragraph=False)
    # results is list[str] in reading-ish order
    lines = [r.strip() for r in results if str(r).strip()]
    return lines

def split_question_and_choices(lines):
    """
    Best-effort splitter.
    Recognizes:
      A) ... B) ... C) ... D) ...
      A. ... etc
      1) ... 2) ... 3) ... 4) ...
    Returns: (question_text, A, B, C, D)
    """
    text = "\n".join(lines)

    # Normalize some common markers
    # We'll find options by regex and split.
    patterns = [
        r'(?i)\bA[\).\:]',
        r'(?i)\bB[\).\:]',
        r'(?i)\bC[\).\:]',
        r'(?i)\bD[\).\:]',
    ]

    # If A/B/C/D markers exist, split by them
    if re.search(patterns[0], text):
        # split keeping markers
        parts = re.split(r'(?i)\b([ABCD])[\).\:]\s*', text)
        # parts: [before, 'A', afterA, 'B', afterB, 'C', afterC, 'D', afterD, ...]
        question = parts[0].strip()
        opts = {"A":"", "B":"", "C":"", "D":""}
        i = 1
        while i + 1 < len(parts):
            key = parts[i].upper().strip()
            val = parts[i+1].strip()
            if key in opts:
                # Clean trailing next markers already handled by split
                opts[key] = val
            i += 2
        return question, opts["A"], opts["B"], opts["C"], opts["D"]

    # If 1/2/3/4 markers exist, map them to A-D
    if re.search(r'\b1[\).\:]', text):
        parts = re.split(r'\b([1-4])[\).\:]\s*', text)
        question = parts[0].strip()
        opts = {"1":"", "2":"", "3":"", "4":""}
        i = 1
        while i + 1 < len(parts):
            key = parts[i].strip()
            val = parts[i+1].strip()
            if key in opts:
                opts[key] = val
            i += 2
        return question, opts["1"], opts["2"], opts["3"], opts["4"]

    # Fallback: no markers found
    # Weâ€™ll try: first ~1/2 is question, rest empty.
    # (You can improve this later based on your screenshots format.)
    question = " ".join(lines).strip()
    return question, "", "", "", ""

def pix2tex_latex(pil_img):
    """
    Best-effort LaTeX from the image.
    If the image is a full multiple-choice screenshot, this may not be perfect.
    Still useful as a 'math latex guess' column.
    """
    try:
        return latex_model(pil_img)
    except Exception as e:
        return f"LATEX_ERROR: {repr(e)}"

def append_row_to_csv(csv_file, row_dict):
    os.makedirs(os.path.dirname(csv_file) or ".", exist_ok=True)
    df_new = pd.DataFrame([row_dict])

    if os.path.exists(csv_file) and os.path.getsize(csv_file) > 0:
        # append without header
        df_new.to_csv(csv_file, mode="a", header=False, index=False, encoding="utf-8")
    else:
        df_new.to_csv(csv_file, mode="w", header=True, index=False, encoding="utf-8")

# -------------------- Actions --------------------
def on_append(_):
    with result_out:
        clear_output()
    with preview_out:
        clear_output()

    img, name = load_uploaded_pil_image()
    if img is None:
        status.value = "<span style='color:#d29922'>Status: upload an image first.</span>"
        return

    status.value = "<span style='color:#58a6ff'>Status: extracting textâ€¦</span>"
    with preview_out:
        display(img)

    lines = ocr_text_lines(img)
    question_text, A, B, C, D = split_question_and_choices(lines)

    status.value = "<span style='color:#58a6ff'>Status: generating LaTeX (best-effort)â€¦</span>"
    question_latex = pix2tex_latex(img)

    row = {
        "question_text": question_text,
        "question_latex": question_latex,
        "A": A,
        "B": B,
        "C": C,
        "D": D,
        "source_image": name
    }

    csv_file = csv_path.value.strip()
    if not csv_file:
        status.value = "<span style='color:#f85149'>Status: CSV path is empty.</span>"
        return

    append_row_to_csv(csv_file, row)

    status.value = f"<span style='color:#3fb950'>Status: appended âœ… â†’ {csv_file}</span>"

    with result_out:
        print("Extracted question_text:\n", question_text, "\n")
        print("Choices:")
        print("A:", A)
        print("B:", B)
        print("C:", C)
        print("D:", D)
        print("\nLaTeX (best-effort):\n", question_latex)

def on_clear(_):
    upload.value = ()
    status.value = "<span style='color:#888'>Status: waitingâ€¦</span>"
    with preview_out:
        clear_output()
    with result_out:
        clear_output()

append_btn.on_click(on_append)
clear_btn.on_click(on_clear)

# -------------------- Layout --------------------
buttons = widgets.HBox([append_btn, clear_btn], layout=widgets.Layout(gap="10px"))

ui = widgets.VBox([
    title,
    widgets.HTML("<hr style='border:0;border-top:1px solid #333;margin:10px 0'/>"),
    widgets.HBox([upload, csv_path], layout=widgets.Layout(gap="12px")),
    buttons,
    status,
    widgets.HTML("<b>Image preview</b>"),
    preview_out,
    widgets.HTML("<b>Extraction result</b>"),
    result_out
])

display(ui)


Progress: |â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100.0% Complete

VBox(children=(HTML(value="<h2 style='margin:0'>ðŸ“¥ Question Image â†’ CSV (with LaTeX)</h2><div style='color:#888â€¦