In [2]:
from fastapi import FastAPI, Query
from fastapi.responses import StreamingResponse
from pypdf import PdfReader, PdfWriter
from io import BytesIO
import os

app = FastAPI()

PDF_DIR = "/mnt/f/chatbot_ui_v5/docs"  # base folder

def extract_pdf_pages(pdf_path: str, pages: list[int]) -> PdfWriter:
    reader = PdfReader(pdf_path)
    writer = PdfWriter()

    for page_num in pages:
        if 0 <= page_num < len(reader.pages):
            writer.add_page(reader.pages[page_num])
    return writer

def merge_pdf_parts(pdf_specs: list[tuple[str, list[int]]]) -> BytesIO:
    final_writer = PdfWriter()

    for pdf_name, pages in pdf_specs:
        path = os.path.join(PDF_DIR, pdf_name)
        writer = extract_pdf_pages(path, pages)
        for page in writer.pages:
            final_writer.add_page(page)

    output_stream = BytesIO()
    final_writer.write(output_stream)
    output_stream.seek(0)
    return output_stream


In [3]:
@app.get("/custom-pdf")
def get_custom_pdf(
    pdfs: list[str] = Query(..., description="List of PDF filenames"),
    page_ranges: list[str] = Query(..., description="Comma-separated page indices per file (0-based)")
):
    """
    Example:
    /custom-pdf?pdfs=doc1.pdf&pdfs=doc2.pdf&page_ranges=0,1&page_ranges=2
    Will take pages 0,1 from doc1.pdf and page 2 from doc2.pdf
    """

    specs = []
    for name, range_str in zip(pdfs, page_ranges):
        page_nums = [int(p.strip()) for p in range_str.split(",")]
        specs.append((name, page_nums))

    output = merge_pdf_parts(specs)
    return StreamingResponse(output, media_type="application/pdf", headers={"Content-Disposition": "inline; filename=custom.pdf"})


In [None]:
#GET /custom-pdf?pdfs=2412.17149v1.pdf&page_ranges=0,1,2
#GET /custom-pdf?pdfs=a.pdf&pdfs=b.pdf&page_ranges=0,1&page_ranges=2,3