In [3]:
from PIL import Image
import os

def convert_images_to_pdf(input_folder, output_folder):
    # List all files in the input folder
    files = os.listdir(input_folder)

    # Create a dictionary to store images grouped by common prefix
    grouped_images = {}

    # Loop through the files and group them by common prefix
    for file_name in files:
        if file_name.lower().endswith(('.jpg', '.png')):
            prefix, _ = file_name.split('_', 1)  # Split the file name at the first underscore
            if prefix not in grouped_images:
                grouped_images[prefix] = []
            grouped_images[prefix].append(file_name)

    # Process each group of images and create a single PDF
    for prefix, image_list in grouped_images.items():
        pdf_images = []

        for image_name in image_list:
            input_path = os.path.join(input_folder, image_name)

            # Open the image file
            img = Image.open(input_path)

            # Calculate the target aspect ratio
            target_ratio = 8.5 / 11

            # Get original dimensions
            original_width, original_height = img.size

            # Calculate the current aspect ratio
            current_ratio = original_width / original_height

            # Adjust the image size to fit the 8.5x11 aspect ratio
            if current_ratio > target_ratio:
                # Current image is wider than target ratio, adjust height
                new_height = int(original_width / target_ratio)
                new_img = Image.new('RGB', (original_width, new_height), 'white')
                new_img.paste(img, (0, (new_height - original_height) // 2))  # Center the original image vertically
                img = new_img
            else:
                # Current image is taller than target ratio, adjust width
                new_width = int(original_height * target_ratio)
                new_img = Image.new('RGB', (new_width, original_height), 'white')
                new_img.paste(img, ((new_width - original_width) // 2, 0))  # Center the original image horizontally
                img = new_img

            pdf_images.append(img)

        # Save all pages as a single PDF file
        output_path = os.path.join(output_folder, f'{prefix}.pdf')
        pdf_images[0].save(output_path, save_all=True, append_images=pdf_images[1:], resolution=100.0)

input_folder = r'C:\Users\Shadow\Downloads\qasper_DiT_DLA_Results_11-13-2023\s3_outputs\page_images_annotated'
output_folder = r'C:\Users\Shadow\Downloads\qasper_DiT_DLA_Results_11-13-2023\s3_outputs\annotated_pdfs'


convert_images_to_pdf(input_folder, output_folder)
