Install the required library `pdf2image` which is used to convert PDF files to images. This library allows the notebook to process PDF documents and convert each page into separate image files.

In [None]:
# %pip install pdf2image

Define the function `convert_pdfs_to_images` which takes an input directory containing PDF files and an output directory to save the converted images. This function searches for all PDF files within the specified input directory, converts each page of each PDF file into JPEG images using a resolution of 300 dpi, and saves them in a structured directory format. The usage example at the end of the cell demonstrates how to call this function with a sample input and output directory.

In [None]:
import os
import glob
from pdf2image import convert_from_path

def convert_pdfs_to_images(input_path, output_path):
    # Check if the input path exists
    if not os.path.exists(input_path):
        raise ValueError("Input path does not exist")

    # Ensure output path exists
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    # Find all PDF files in the input path
    pdf_files = glob.glob(os.path.join(input_path, '*.pdf'))
    
    for pdf_file in pdf_files:
        # Extract the base name for the PDF file to create a folder
        base_name = os.path.splitext(os.path.basename(pdf_file))[0]
        pdf_output_path = os.path.join(output_path, base_name)
        
        # Create a directory for the current PDF file
        if not os.path.exists(pdf_output_path):
            os.makedirs(pdf_output_path)

        # Convert PDF to images
        print(f"Converting {pdf_file} to images...")
        images = convert_from_path(pdf_file, dpi=300, fmt='jpeg')
        total_pages = len(images)
        digits = len(str(total_pages))

        # Save each page as a JPEG image
        for i, image in enumerate(images):
            image_path = os.path.join(pdf_output_path, f"Page_{str(i+1).zfill(digits)}.jpeg")
            image.save(image_path, "JPEG")
            print(f"Page {i+1} saved as image: {image_path}")

# Usage example:
# Specify the directory containing PDF files and the output directory
input_directory = './Input'
output_directory = './ToImages'
convert_pdfs_to_images(input_directory, output_directory)
