In [3]:
import os
import re
from PIL import Image
from fpdf import FPDF

In [None]:
# def aggregate_images_to_pdf(frames_folder="Frames", output_folder="PDFs"):
#     """
#     Aggregates images from each video folder within the 'Frames' folder
#     into a single PDF named after the video in the 'PDFs' folder.
#     Skips conversion if the output PDF already exists.

#     Args:
#         frames_folder (str): The name of the main folder containing video folders.
#                              Defaults to "Frames".
#         output_folder (str): The name of the folder to save the generated PDFs.
#                                Defaults to "PDFs".
#     """
#     if not os.path.exists(frames_folder):
#         print(f"Error: Folder '{frames_folder}' not found.")
#         return

#     # Create the output folder if it doesn't exist
#     os.makedirs(output_folder, exist_ok=True)

#     video_folders = [
#         f for f in os.listdir(frames_folder)
#         if os.path.isdir(os.path.join(frames_folder, f))
#     ]

#     for video_name in video_folders:
#         video_path = os.path.join(frames_folder, video_name)
#         pdf_filename = f"{video_name}.pdf"
#         pdf_output_path = os.path.join(output_folder, pdf_filename)

#         # Skip conversion if the output PDF already exists
#         if os.path.exists(pdf_output_path):
#             print(f"PDF already exists for video: {video_name}. Skipping.")
#             continue

#         image_files = [
#             f for f in os.listdir(video_path)
#             if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp'))
#         ]

#         if not image_files:
#             print(f"No image files found in folder: {video_name}")
#             continue

#         def get_sort_key(filename):
#             """Extracts the numerical part for sorting."""
#             match = re.findall(r'(\d+)', os.path.splitext(filename)[0])
#             if match:
#                 # Try to sort by the last numerical part, which is likely the sequence number
#                 return int(match[-1])
#             else:
#                 # If no number is found, sort alphabetically
#                 return filename

#         # Sort image files based on the extracted numerical part
#         sorted_images = sorted(image_files, key=get_sort_key)

#         pdf = FPDF()

#         for image_name in sorted_images:
#             image_path = os.path.join(video_path, image_name)
#             try:
#                 img = Image.open(image_path)
#                 width, height = img.size

#                 # Determine page orientation (landscape if width > height)
#                 orientation = 'P' if height >= width else 'L'
#                 pdf.add_page(orientation=orientation)

#                 # Calculate appropriate dimensions to fit the image on the page
#                 page_width = pdf.w - 2 * pdf.l_margin
#                 page_height = pdf.h - 2 * pdf.b_margin

#                 if orientation == 'P':
#                     ratio = min(page_width / width, page_height / height)
#                 else:
#                     ratio = min(page_height / width, page_width / height)

#                 new_width = width * ratio
#                 new_height = height * ratio

#                 # Calculate position to center the image
#                 x_pos = (page_width - new_width) / 2 + pdf.l_margin
#                 y_pos = (page_height - new_height) / 2 + pdf.b_margin

#                 pdf.image(image_path, x=x_pos, y=y_pos, w=new_width, h=new_height)
#             except Exception as e:
#                 print(f"Error processing image {image_name}: {e}")

#         try:
#             pdf.output(pdf_output_path, "F")
#             print(f"Successfully created PDF: {pdf_filename}")
#         except Exception as e:
#             print(f"Error creating PDF {pdf_filename}: {e}")

In [8]:
import os
import re
from PIL import Image

def natural_key(string):
    """
    A helper function to implement natural sorting.
    Splits the string into a list of integers and non-integer substrings.
    """
    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', string)]

# Define directories
frames_dir = "Frames"
pdfs_dir = "PDFs"

# Ensure the output directory exists
os.makedirs(pdfs_dir, exist_ok=True)

# Iterate over each video folder in the Frames directory
for video_folder in os.listdir(frames_dir):
    video_path = os.path.join(frames_dir, video_folder)
    if os.path.isdir(video_path):
        output_pdf = os.path.join(pdfs_dir, f"{video_folder}.pdf")
        # Skip conversion if the output file already exists
        if os.path.exists(output_pdf):
            print(f"Skipping {output_pdf} (already exists).")
            continue

        # Gather all image files (filtering by common image extensions)
        image_files = [f for f in os.listdir(video_path)
                       if f.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif'))]
        if not image_files:
            print(f"No images found in {video_path}.")
            continue

        # Sort the images using natural sort
        image_files.sort(key=natural_key)

        images = []
        for img_file in image_files:
            img_path = os.path.join(video_path, img_file)
            try:
                with Image.open(img_path) as im:
                    # Convert image to RGB if not already (PDF requires RGB)
                    if im.mode != 'RGB':
                        im = im.convert('RGB')
                    # Create a copy of the image to add to our list
                    images.append(im.copy())
            except Exception as e:
                print(f"Error processing {img_path}: {e}")

        if not images:
            print(f"No valid images in {video_path}.")
            continue

        # Save the images as a single PDF, ensuring each page is exactly the image size
        try:
            first_image, rest = images[0], images[1:]
            first_image.save(output_pdf, "PDF", resolution=100.0, save_all=True, append_images=rest)
            print(f"Saved PDF: {output_pdf}")
        except Exception as e:
            print(f"Error saving PDF for {video_folder}: {e}")

Saved PDF: PDFs/BV1aRZxY5Esn.pdf
Saved PDF: PDFs/BV158HCeZEiK.pdf
Saved PDF: PDFs/BV1zM4m1y75w.pdf
Saved PDF: PDFs/BV1yr421M7SD.pdf
Saved PDF: PDFs/BV1vc411A7kj.pdf
Saved PDF: PDFs/BV1Sg4y1n7Vw.pdf
Saved PDF: PDFs/BV1FZZ4YwEs4.pdf
Saved PDF: PDFs/BV1uGoBYBEKW.pdf
Saved PDF: PDFs/BV1Ur4y1R7au.pdf
Saved PDF: PDFs/BV1UyZ6Y7EnK.pdf
Saved PDF: PDFs/BV1m4ZtY3Egq.pdf
Saved PDF: PDFs/BV1gBSXYaEHp.pdf
Saved PDF: PDFs/BV1AE421g7Ac.pdf
Saved PDF: PDFs/BV1VpZpYzEey.pdf
Saved PDF: PDFs/BV1r96fYGEaX.pdf
Saved PDF: PDFs/BV1Ku411P739.pdf
Saved PDF: PDFs/BV1LtZ4YQEbt.pdf
Saved PDF: PDFs/BV1eKsBeSEij.pdf
Saved PDF: PDFs/BV17KZ8YeEZi.pdf
Saved PDF: PDFs/BV1W1421m7ww.pdf
Saved PDF: PDFs/BV1ZK411C7QU.pdf
Saved PDF: PDFs/BV1bz421y71N.pdf
Saved PDF: PDFs/BV17KZtYdETJ.pdf
Saved PDF: PDFs/BV1fQQ7YQEWJ.pdf
Saved PDF: PDFs/BV1kT42197mG.pdf
Saved PDF: PDFs/BV1YJZrYWEGM.pdf
Saved PDF: PDFs/BV1Uf421B75E.pdf
Saved PDF: PDFs/BV187ZqYgENc.pdf
Saved PDF: PDFs/BV138ZsYkEda.pdf
Saved PDF: PDFs/BV1C3pBebEj5.pdf
Saved PDF:

In [9]:
# if __name__ == "__main__":
#     aggregate_images_to_pdf()
#     print("Image aggregation to PDFs completed.")