In [7]:
import os
import re
from pathlib import Path
from PyPDF2 import PdfMerger

def extract_number(filename):
    """Extract integer from filename with strict validation"""
    match = re.search(r'\d+', filename.name)
    if not match:
        raise ValueError(f"No number found in filename: {filename.name}")
    return int(match.group())

def combine_pdfs(magazine_name):
    desktop = Path.home() / "Desktop/Geo_France"
    pdf_folder = desktop / magazine_name
    
    if not pdf_folder.exists():
        print(f"Folder not found: {pdf_folder}")
        return
    
    # Get list of PDF files
    pdf_files = list(pdf_folder.glob("*.pdf"))
    
    if not pdf_files:
        print("No PDF files found in the folder!")
        return
    
    # Validate and sort files by extracted numbers
    file_number_pairs = []
    for f in pdf_files:
        try:
            num = extract_number(f)
            file_number_pairs.append((num, f))
        except ValueError as e:
            print(e)
            return
    
    # Sort by extracted number
    file_number_pairs.sort(key=lambda x: x[0])
    numbers = [num for num, _ in file_number_pairs]
    
    # Validate sequence: must start at 1, then all even numbers in sequence
    if numbers[0] != 1:
        print(f"ERROR: First page must be 1, found {numbers[0]}")
        return
    
    # Validate the rest of the sequence
    expected_next = 2  # After page 1, we expect page 2
    for num in numbers[1:]:
        # Check if even
        if num % 2 != 0:
            print(f"ERROR: Page {num} is odd. Only the first page can be odd.")
            return
        
        # Check sequence
        if num != expected_next:
            print(f"ERROR: Sequence break! Page {expected_next} is missing. Found {num} instead.")
            return
        
        expected_next = num + 2

    print(desktop)
    
    # Create PDF if all validations pass
    output_pdf = desktop / "Geo_France_Compiled" / f"{magazine_name}.pdf"
    merger = PdfMerger()
    
    for num, pdf in file_number_pairs:
        merger.append(pdf)
        print(f"Added ({num}): {pdf.name}")
    
    merger.write(output_pdf)
    merger.close()
    
    print(f"\nSuccessfully created {output_pdf}")
    print(f"Total pages merged: {len(file_number_pairs)}")
    print(f"First page: {numbers[0]}, Last page: {numbers[-1]}")

if __name__ == "__main__":
    combine_pdfs("Geo_France_2011_12_28")

/Users/fermibot/Desktop/Geo_France
Added (1): 01.pdf
Added (2): 02.pdf
Added (4): 04.pdf
Added (6): 06.pdf
Added (8): 08.pdf
Added (10): 10.pdf
Added (12): 12.pdf
Added (14): 14.pdf
Added (16): 16.pdf
Added (18): 18.pdf
Added (20): 20.pdf
Added (22): 22.pdf
Added (24): 24.pdf
Added (26): 26.pdf
Added (28): 28.pdf
Added (30): 30.pdf
Added (32): 32.pdf
Added (34): 34.pdf
Added (36): 36.pdf
Added (38): 38.pdf
Added (40): 40.pdf
Added (42): 42.pdf
Added (44): 44.pdf
Added (46): 46.pdf
Added (48): 48.pdf
Added (50): 50.pdf
Added (52): 52.pdf
Added (54): 54.pdf
Added (56): 56.pdf
Added (58): 58.pdf
Added (60): 60.pdf
Added (62): 62.pdf
Added (64): 64.pdf
Added (66): 66.pdf
Added (68): 68.pdf
Added (70): 70.pdf
Added (72): 72.pdf
Added (74): 74.pdf
Added (76): 76.pdf
Added (78): 78.pdf
Added (80): 80.pdf
Added (82): 82.pdf
Added (84): 84.pdf
Added (86): 86.pdf
Added (88): 88.pdf
Added (90): 90.pdf
Added (92): 92.pdf
Added (94): 94.pdf
Added (96): 96.pdf
Added (98): 98.pdf
Added (100): 100.pdf