In [1]:
import os
from PyPDF2 import PdfMerger

# =========================
# Settings
# =========================

SOURCE_FOLDER = r"C:\Users\Julian.Diaz\OneDrive - XENITH CONSULTING PTY LTD\Documents\04_eXgen\B\__type___pdf"
DEST_FOLDER = r"C:\Users\Julian.Diaz\OneDrive - XENITH CONSULTING PTY LTD\Documents\04_eXgen\B\__type___pdf\B-for-NotebookLM-pdf"

MAX_BATCH_MB = 200  # Max size per merged PDF (approximate)
MAX_BATCH_BYTES = MAX_BATCH_MB * 1024 * 1024


# =========================
# Helpers
# =========================

def get_pdf_files_with_sizes(folder):
    """Return list of (full_path, size_bytes) for all PDFs in folder."""
    pdfs = []
    for name in os.listdir(folder):
        if not name.lower().endswith(".pdf"):
            continue
        full_path = os.path.join(folder, name)
        if not os.path.isfile(full_path):
            continue
        size_bytes = os.path.getsize(full_path)
        pdfs.append((full_path, size_bytes))

    # Sort by file name for deterministic ordering
    pdfs.sort(key=lambda x: os.path.basename(x[0]).lower())
    return pdfs


def make_batches(pdfs, max_bytes):
    """
    Group PDFs into batches such that the sum of sizes in each batch
    is <= max_bytes (approximate limit).
    """
    batches = []
    current_batch = []
    current_size = 0

    for path, size in pdfs:
        # If a single file is larger than the limit, put it in its own batch
        # and warn (NotebookLM may still reject it).
        if size > max_bytes:
            print(
                f"WARNING: '{path}' is larger than {MAX_BATCH_MB} MB by itself "
                f"({size / (1024**2):.1f} MB). Placing alone in its own batch."
            )
            if current_batch:
                batches.append(current_batch)
                current_batch = []
                current_size = 0
            batches.append([(path, size)])
            continue

        # If adding this file would exceed the limit, start a new batch
        if current_batch and current_size + size > max_bytes:
            batches.append(current_batch)
            current_batch = []
            current_size = 0

        current_batch.append((path, size))
        current_size += size

    if current_batch:
        batches.append(current_batch)

    return batches


def merge_batch(batch, output_path):
    """Merge a batch of PDFs into a single PDF at output_path."""
    merger = PdfMerger()
    try:
        for path, _ in batch:
            merger.append(path)
        with open(output_path, "wb") as f_out:
            merger.write(f_out)
    finally:
        merger.close()


# =========================
# Main
# =========================

def main():
    print(f"Looking for PDFs in:\n{SOURCE_FOLDER}\n")

    pdfs = get_pdf_files_with_sizes(SOURCE_FOLDER)
    if not pdfs:
        print("No PDF files found. Exiting.")
        return

    print("Found the following PDF files:")
    for path, size in pdfs:
        print(f" - {os.path.basename(path)} ({size / (1024**2):.2f} MB)")
    print()

    os.makedirs(DEST_FOLDER, exist_ok=True)

    batches = make_batches(pdfs, MAX_BATCH_BYTES)
    print(f"Planned {len(batches)} batch(es) (approx ≤ {MAX_BATCH_MB} MB each).\n")

    for i, batch in enumerate(batches, start=1):
        batch_size_bytes = sum(size for _, size in batch)
        out_name = f"NotebookLM_batch_{i:03d}.pdf"
        out_path = os.path.join(DEST_FOLDER, out_name)

        print(
            f"Creating {out_name}: "
            f"{len(batch)} file(s), approx {batch_size_bytes / (1024**2):.2f} MB"
        )
        merge_batch(batch, out_path)

    print("\nDone!")
    print(f"Merged PDFs saved in:\n{DEST_FOLDER}")
    print("Original PDFs were not modified.")


if __name__ == "__main__":
    main()


Looking for PDFs in:
C:\Users\Julian.Diaz\OneDrive - XENITH CONSULTING PTY LTD\Documents\04_eXgen\B\__type___pdf

Found the following PDF files:
 - 2004_ANNUAL_REPORT.pdf (0.37 MB)
 - ANNUAL_REPORT_ATTACHMENT.pdf (0.01 MB)
 - APPENDIX_1_-_ALTERATION_LEGEND.pdf (0.01 MB)
 - APPENDIX_1_-_LITHOLOGY_LEGEND.pdf (0.01 MB)
 - APPENDIX_1_-_PHASE_ONE_GRAVITY_SURVEY_REPORT.pdf (0.51 MB)
 - APPENDIX_1_-_REPROCESSED_REGIONAL_MAGNETIC_IMAGERY.pdf (7.02 MB)
 - APPENDIX_1_-_REPROCESSED_REGIONAL_MAGNETIC_IMAGERY__1.pdf (6.33 MB)
 - APPENDIX_1_-_REPROCESSED_REGIONAL_MAGNETIC_IMAGERY__2.pdf (7.39 MB)
 - APPENDIX_1_-_REPROCESSED_REGIONAL_MAGNETIC_IMAGERY__3.pdf (6.53 MB)
 - APPENDIX_1_-_SEEBASE_STUDY_AND_GIS_GEOPHYSICS_FROGTECH_GEOSCIENCE.pdf (60.98 MB)
 - APPENDIX_2_-_ANALYSIS_SPECIFICATIONS.pdf (0.15 MB)
 - APPENDIX_2_-_GEOPHYSICS_-_EM.pdf (0.07 MB)
 - APPENDIX_2_-_PHASE_TWO_GRAVITY_SURVEY_REPORT.pdf (12.60 MB)
 - APPENDIX_2_-_REPROCESSED_GRAVITY_IMAGERY.pdf (28.40 MB)
 - APPENDIX_2_-_REPROCESSED_REGIO

Invalid stream (index 0) within object 89 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 65 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 66 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 67 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 68 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 69 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 70 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 71 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 72 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 73 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 74 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 75 0: Stream has ended unexpectedly
Invalid stream (index 0) within object 76 0: Stream has ended unexpectedly
Invalid stream (index 0) 

Creating NotebookLM_batch_008.pdf: 25 file(s), approx 116.47 MB

Done!
Merged PDFs saved in:
C:\Users\Julian.Diaz\OneDrive - XENITH CONSULTING PTY LTD\Documents\04_eXgen\B\__type___pdf\B-for-NotebookLM-pdf
Original PDFs were not modified.
