In [None]:
import os
from lxml import etree
from PIL import Image
from IPython.display import display
import ipywidgets as widgets
from pathlib import Path

In [None]:
project_root = Path("/Users/phapman/Desktop/tnh-scholar")
working_dir = project_root / "data_processing" / "processed_journal_data/phat-giao-viet-nam-1956-25-26"
image_dir_path = working_dir / "html_view/images"

In [None]:
# Paths (adjust these as needed)
xml_file_path = working_dir / "full_OCR_text_phat-giao-viet-nam-1956-25-26.xml"

# Load XML file
def load_xml(file_path):
    with open(file_path, "r", encoding="utf-8") as file:
        tree = etree.parse(file)
    return tree

# Save XML file
def save_xml(tree, file_path):
    with open(file_path, "wb") as file:
        tree.write(file, pretty_print=True, encoding="utf-8", xml_declaration=True)

# Extract OCR text and image paths
def extract_pages(tree, image_dir):
    pages = []
    for page in tree.xpath("//page"):
        page_number = page.get("page")
        print(f"processing page {page_number}")
        ocr_text = page.text.strip() if page.text else ""
        image_path = os.path.join(image_dir, f"page_{page_number}.jpg")  # Assuming images are named by page number
        if os.path.exists(image_path):
            pages.append({"number": page_number, "text": ocr_text, "image": image_path})
    return pages

# # Display text and image side by side with editing
# def edit_page(page, tree):
#     page_text_area = widgets.Textarea(
#         value=page["text"],
#         description=f"Page {page['number']}",
#         layout=widgets.Layout(width="100%", height="300px")
#     )

#     # Display image
#     img = Image.open(page["image"])
#     display(img)

#     # Display editable text area
#     display(page_text_area)

#     # Save button
#     save_button = widgets.Button(description="Save Changes")
#     output = widgets.Output()

#     def save_changes(_):
#         # Update XML with new text
#         page_element = tree.xpath(f"//page[@page='{page['number']}']")[0]
#         page_element.text = page_text_area.value
#         save_xml(tree, xml_file_path)
#         with output:
#             print(f"Page {page['number']} updated and saved!")

#     save_button.on_click(save_changes)
#     display(save_button, output)



In [None]:
from IPython.display import display, HTML, Javascript

def edit_page_with_save(page, tree):
    """
    Displays OCR text and corresponding image side by side with save functionality.
    """
    html_content = f"""
    <div style="display: flex; align-items: flex-start; gap: 20px;">
        <div>
            <img src="{page['image']}" alt="Page {page['number']} Image" style="max-width: 600px; border: 1px solid #ccc;">
        </div>
        <div style="flex-grow: 1;">
            <textarea id="text-area-{page['number']}" 
                      style="width: 100%; height: 300px; border: 1px solid #ccc; padding: 5px; font-family: monospace;">
{page['text']}
            </textarea>
            <button id="save-button-{page['number']}" 
                    style="margin-top: 10px; padding: 5px 10px; font-size: 14px;">Save Changes</button>
        </div>
    </div>
    <script>
        document.getElementById("save-button-{page['number']}").onclick = function() {{
            const text = document.getElementById("text-area-{page['number']}").value;
            const kernel = Jupyter.notebook.kernel;
            const command = "update_text_in_python(" + {page['number']} + ", `" + text.replace(/`/g, "\\`") + "`)";
            kernel.execute(command);
            alert("Saved changes for page {page['number']}!");
        }};
    </script>
    """
    display(HTML(html_content))

In [None]:
def update_text_in_python(page_number, new_text):
    # Update XML tree with new text
    page_element = tree.xpath(f"//page[@page='{page_number}']")[0]
    page_element.text = new_text
    save_xml(tree, xml_file_path)
    print(f"Saved changes for page {page_number}")

In [None]:
os.getcwd()

In [None]:
os.path.exists("html_view/images/page_2.jpg")

In [None]:
img = Image.open("html_view/images/page_2.jpg")
display(img)


In [None]:
# Main
tree = load_xml(xml_file_path)
pages = extract_pages(tree, image_dir_path)

In [None]:
pages

In [None]:
import streamlit as st
from PIL import Image

# Simulate OCR processing
def load_ocr_results(image_path):
    # Dummy OCR text for demonstration
    return "This is the OCR result for the image."

# Sidebar for selecting an image
st.sidebar.title("OCR Editor")
uploaded_file = st.sidebar.file_uploader("Upload an Image", type=["jpg", "jpeg", "png", "pdf"])

if uploaded_file:
    # Display the image
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image", use_column_width=True)

    # Display OCR text in an editable field
    ocr_text = load_ocr_results(uploaded_file)
    edited_text = st.text_area("Edit OCR Text", ocr_text, height=300)

    # Save button
    if st.button("Save Changes"):
        # Replace with your saving logic
        st.write("Saved changes!")
        # For example, save to file
        with open("ocr_output.txt", "w") as f:
            f.write(edited_text)

else:
    st.write("Upload an image to begin.")