## Marker-API

This notebook goes over how to invoke the api using python and save the response as markdown

In [14]:
# !pip install requests pillow 

In [12]:
import os
import requests
from PIL import Image
import base64
import argparse

def save_images_and_markdown(response_data, output_folder):
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    for pdf in response_data:
        pdf_filename = pdf['filename']
        pdf_output_folder = os.path.join(output_folder, os.path.splitext(pdf_filename)[0])

        # Create a folder for each PDF
        os.makedirs(pdf_output_folder, exist_ok=True)

        # Save markdown
        markdown_text = pdf['markdown']
        with open(os.path.join(pdf_output_folder, 'output.md'), 'w', encoding='utf-8') as f:
            f.write(markdown_text)

        # Save images
        image_data = pdf['images']
        for image_name, image_base64 in image_data.items():
            # Decode base64 image
            image_bytes = base64.b64decode(image_base64)

            # Save image
            with open(os.path.join(pdf_output_folder, image_name), 'wb') as f:
                f.write(image_bytes)

def convert_pdf_to_markdown_and_save(pdf_file_paths, output_folder, server_url):
    files = []
    
    # Prepare the files for the request
    for pdf_file_path in pdf_file_paths:
        with open(pdf_file_path, 'rb') as f:
            pdf_content = f.read()
        files.append(('pdf_files', (os.path.basename(pdf_file_path), pdf_content, 'application/pdf')))
        
    # Send request to FastAPI server with all PDF files attached
    response = requests.post(server_url, files=files)

    # Check if request was successful
    if response.status_code == 200:
        # Save markdown and images
        response_data = response.json()
        save_images_and_markdown(response_data, output_folder)
        print("Markdown and images saved successfully.")
    else:
        print(f"Error: {response.text}")
        

#### Convert a Simple file and save it as Markdown

In [11]:

pdf_file_paths = ["test.pdf"]
output_folder = "output_folder"
server_url = "http://127.0.0.1:8000/convert"

# Convert PDF to markdown and save
convert_pdf_to_markdown_and_save(pdf_file_paths, output_folder, server_url)

Markdown and images saved successfully.


#### Convert Multiple PDF files and save them as markdown

In [13]:
pdf_file_paths = ["test.pdf", "test2.pdf"]
output_folder = "output_folder"
server_url = "http://127.0.0.1:8000/convert"

# Convert PDF to markdown and save
convert_pdf_to_markdown_and_save(pdf_file_paths, output_folder, server_url)

Markdown and images saved successfully.
