In [None]:
# %pip install pdf2image

In [None]:
import os
import glob
from pdf2image import convert_from_path

def convert_pdfs_to_images(input_path, output_path):
    # Check if the input path exists
    if not os.path.exists(input_path):
        raise ValueError("Input path does not exist")

    # Ensure output path exists
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    # Find all PDF files in the input path
    pdf_files = glob.glob(os.path.join(input_path, '*.pdf'))
    
    for pdf_file in pdf_files:
        # Extract the base name for the PDF file to create a folder
        base_name = os.path.splitext(os.path.basename(pdf_file))[0]
        pdf_output_path = os.path.join(output_path, base_name)
        
        # Create a directory for the current PDF file
        if not os.path.exists(pdf_output_path):
            os.makedirs(pdf_output_path)

        # Convert PDF to images
        print(f"Converting {pdf_file} to images...")
        images = convert_from_path(pdf_file, dpi=300, fmt='jpeg')
        total_pages = len(images)
        digits = len(str(total_pages))

        # Save each page as a JPEG image
        for i, image in enumerate(images):
            image_path = os.path.join(pdf_output_path, f"Page_{str(i+1).zfill(digits)}.jpeg")
            image.save(image_path, "JPEG")
            print(f"Page {i+1} saved as image: {image_path}")

# Usage example:
# Specify the directory containing PDF files and the output directory
input_directory = './Input'
output_directory = './ToImages'
convert_pdfs_to_images(input_directory, output_directory)


Converting ./Input\entra-identity-multi-tenant-organizations.pdf to images...
Page 1 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_001.jpeg
Page 2 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_002.jpeg
Page 3 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_003.jpeg
Page 4 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_004.jpeg
Page 5 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_005.jpeg
Page 6 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_006.jpeg
Page 7 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_007.jpeg
Page 8 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_008.jpeg
Page 9 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_009.jpeg
Page 10 saved as image: ./ToImages\entra-identity-multi-tenant-organizations\Page_010.jpeg
Page 11 saved as imag