In [19]:
import fitz  # PyMuPDF
import os
from PIL import Image
from io import BytesIO

# Step 1: Create large PNG images (larger size for heavier PDF)
def generate_large_images(image_count=5, width=3000, height=3000):
    images = []
    for i in range(image_count):
        img = Image.new("RGB", (width, height), (i * 30 % 255, i * 60 % 255, i * 90 % 255))
        img_bytes = BytesIO()
        # Guardar como PNG sin compresión para aumentar tamaño
        img.save(img_bytes, format='PNG', compress_level=0)
        img_bytes.seek(0)
        images.append(img_bytes.getvalue())
    return images

# Step 2: Create the original PDF with PNG images (bigger file)
def create_heavy_pdf(images, output_path="20250623_original_file.pdf"):
    doc = fitz.open()
    for img_bytes in images:
        img = Image.open(BytesIO(img_bytes))
        rect = fitz.Rect(0, 0, img.width, img.height)
        page = doc.new_page(width=img.width, height=img.height)
        page.insert_image(rect, stream=img_bytes)
    doc.save(output_path)
    doc.close()

# Step 3: Compress the PDF with JPEG and resizing
def compress_pdf(input_path, output_path, image_quality=50, image_scale=0.4):
    doc = fitz.open(input_path)
    for page_index in range(len(doc)):
        page = doc[page_index]
        images = page.get_images(full=True)
        for img in images:
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image = Image.open(BytesIO(image_bytes))

            # Resize and compress
            width, height = image.size
            new_size = (int(width * image_scale), int(height * image_scale))
            image = image.resize(new_size, Image.Resampling.LANCZOS).convert("RGB")

            compressed_image_bytes = BytesIO()
            image.save(compressed_image_bytes, format="JPEG", quality=image_quality)

            rect = page.rect
            page.clean_contents()
            page.insert_image(rect, stream=compressed_image_bytes.getvalue())

    doc.save(output_path, deflate=True)
    doc.close()

# Run the process
images = generate_large_images()
create_heavy_pdf(images, "20250623_original_file.pdf")

input_pdf = "20250623_original_file.pdf"
output_pdf = "20250623_compressed_file.pdf"

compress_pdf(input_pdf, output_pdf)

# Display results
original_size = os.path.getsize(input_pdf) / (1024 * 1024)
compressed_size = os.path.getsize(output_pdf) / (1024 * 1024)

print(f"Original file size: {original_size:.2f} MB")
print(f"Compressed file size: {compressed_size:.2f} MB")


Original file size: 128.75 MB
Compressed file size: 0.25 MB
