# Installations

In [1]:
pip install pymupdf openai

Collecting pymupdf
  Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m49.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.25.3


# Your arguments here

In [None]:
LLM_NAME = "gpt-4o"
PDF_PATH = "1704.00805v4.pdf"
START_PAGE = 2
END_PAGE = 9
openai_key = "your openai key here"

# Functions

In [14]:
import fitz  # PyMuPDF
import os
import base64
from openai import OpenAI


def pdf_to_images(pdf_path, output_folder, start_page, end_page, zoom=4):
    """
    Convert specified PDF pages to high-quality images.

    :param pdf_path: Path to the PDF file
    :param output_folder: Output directory for images
    :param start_page: First page to convert (1-based)
    :param end_page: Last page to convert (1-based)
    :param zoom: Zoom factor for image quality (default=4)
    """
    # Validate page range
    if start_page > end_page:
        raise ValueError("Start page cannot be greater than end page")

    # Create output folder if needed
    os.makedirs(output_folder, exist_ok=True)

    pdf = fitz.open(pdf_path)

    # Validate page numbers
    if end_page > len(pdf):
        end_page = len(pdf)
        print(f"Adjusted end page to PDF length: {end_page}")

    for page_num in range(start_page-1, end_page):  # 0-based index
        page = pdf.load_page(page_num)
        mat = fitz.Matrix(zoom, zoom)
        pix = page.get_pixmap(matrix=mat)
        output_path = os.path.join(output_folder, f"page_{page_num+1}.png")
        pix.save(output_path)
        print(f"Saved page {page_num+1} as {output_path}")

    pdf.close()

def image_to_latex(image_path, prompt, model=LLM_NAME):
    """
    Convert image to LaTeX using OpenAI's vision model.

    :param image_path: Path to the image file
    :param prompt: Instruction for the model
    :param model: model to use
    :return: Generated LaTeX code
    """
    # Encode image to base64
    with open(image_path, "rb") as image_file:
        base64_image = base64.b64encode(image_file.read()).decode("utf-8")

    client = OpenAI(api_key=openai_key)

    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{base64_image}"
                        },
                    },
                ],
            }
        ],
        #max_tokens=1500,
    )

    return response.choices[0].message.content

def postprocess_text_output(full_latex_text):
    preamble = full_latex_text.split('\\begin{document}')[0]+"\n"+'\\begin{document}'
    full_latex_text_processed = full_latex_text.replace("```latex", "").replace('```', '')
    full_latex_text_processed_splitted = full_latex_text_processed.split("\documentclass{article}")
    full_latex_text_processed_splitted = [x for x in full_latex_text_processed_splitted if len(x.strip())>1]
    full_latex_text_processed_splitted = [x.split('\\begin{document}')[-1].replace("\end{document}", "") for x in full_latex_text_processed_splitted]
    full_latex_text_processed = preamble + "\n\n".join(full_latex_text_processed_splitted) + "\n\end{document}"
    return full_latex_text_processed

def main():

    # ===== CONFIGURATION =====
    PDF_PATH = "1704.00805v4.pdf"  # Change this to your PDF path
    START_PAGE = 2
    END_PAGE = 9
    IMAGE_OUTPUT_DIR = "pdf_images"
    LATEX_OUTPUT_DIR = "latex_output"
    ZOOM_FACTOR = 4  # Increase for higher resolution (max 10 recommended)
    # =========================

    # Create output directories
    os.makedirs(IMAGE_OUTPUT_DIR, exist_ok=True)
    os.makedirs(LATEX_OUTPUT_DIR, exist_ok=True)

    # Step 1: Convert PDF pages to images
    print("\nConverting PDF pages to images...")
    pdf_to_images(
        pdf_path=PDF_PATH,
        output_folder=IMAGE_OUTPUT_DIR,
        start_page=START_PAGE,
        end_page=END_PAGE,
        zoom=ZOOM_FACTOR
    )

    # Step 2: Process images with OpenAI
    print("\nConverting images to LaTeX...")
    PROMPT = """Convert this image to LaTeX code. Follow these rules:
1. Use proper LaTeX syntax and environments
2. Preserve mathematical notation exactly
3. NO markdown formatting
4. Ensure accurate alignment and structure
5. The latex code should work as it is
6. Don't include any graphics because I don't have them
7. For the citations don't use \cite, just write them in regular text format as they appear in the image

Output ONLY the LaTeX code:"""
    full_latex_text = ""
    for page_num in range(START_PAGE, END_PAGE + 1):
        image_path = os.path.join(IMAGE_OUTPUT_DIR, f"page_{page_num}.png")
        if not os.path.exists(image_path):
            print(f"Skipping page {page_num} - image not found")
            continue

        print(f"Processing page {page_num}...")
        try:
            latex_code = image_to_latex(image_path, PROMPT)
            output_path = os.path.join(LATEX_OUTPUT_DIR, f"page_{page_num}.tex")
            full_latex_text += latex_code + "\n"
            #print(full_latex_text)
            #print('#########################')

            with open(output_path, "w") as f:
                f.write(latex_code)

            print(f"Successfully saved LaTeX for page {page_num}\n")
        except Exception as e:
            print(f"Error processing page {page_num}: {str(e)}")
    return postprocess_text_output(full_latex_text)

# Executions

In [None]:
full_latex_text = main()
print(full_latex_text)