In [1]:
pip install pymupdf


Note: you may need to restart the kernel to use updated packages.


In [7]:
import fitz  # PyMuPDF
import os

def split_pdf(input_pdf_path, output_dir, pages_per_split=80):
    # Open the input PDF file
    pdf_document = fitz.open(input_pdf_path)
    total_pages = len(pdf_document)
    
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Loop through the document and split it into chunks
    for i in range(0, total_pages, pages_per_split):
        # Calculate the range of pages for this chunk
        start_page = i
        end_page = min(i + pages_per_split - 1, total_pages - 1)
        
        # Create a new PDF for each chunk
        chunk_pdf = fitz.open()
        chunk_pdf.insert_pdf(pdf_document, from_page=start_page, to_page=end_page)
        
        # Save the chunk PDF with a filename indicating the page range
        output_pdf_path = os.path.join(output_dir, f"pages_{start_page + 1}_to_{end_page + 1}.pdf")
        chunk_pdf.save(output_pdf_path)
        chunk_pdf.close()
    
    # Close the input PDF document
    pdf_document.close()



In [8]:
# Example usage:
input_pdf_path = "D:\\NTA 8800_2024 nl.pdf"
output_dir = "D:\Try21"
split_pdf(input_pdf_path, output_dir)


# custom chank

In [13]:
import fitz  # PyMuPDF
import os

def split_pdf_custom_ranges(input_pdf_path, output_dir, page_ranges):
    """
    Splits the PDF based on custom page ranges.

    Parameters:
    - input_pdf_path: Path to the input PDF file.
    - output_dir: Directory to save the output PDFs.
    - page_ranges: List of tuples where each tuple contains (start_page, end_page).
    """
    # Open the input PDF file
    pdf_document = fitz.open(input_pdf_path)
    total_pages = len(pdf_document)
    
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for start_page, end_page in page_ranges:
        # Ensure the start and end pages are within the document's range
        if start_page < 1 or end_page > total_pages or start_page > end_page:
            print(f"Invalid range: {start_page} to {end_page}. Skipping.")
            continue
        
        # Create a new PDF for the specified range
        chunk_pdf = fitz.open()
        chunk_pdf.insert_pdf(pdf_document, from_page=start_page - 1, to_page=end_page - 1)
        
        # Save the chunk PDF with a filename indicating the page range
        output_pdf_path = os.path.join(output_dir, f"pages_{start_page}_to_{end_page}.pdf")
        chunk_pdf.save(output_pdf_path)
        chunk_pdf.close()
    
    # Close the input PDF document
    pdf_document.close()




In [17]:
# Example usage:
input_pdf_path = "D:\\NTA 8800_2024 nl.pdf"
output_dir = "D:\Try21"
page_ranges = [(1,9), (10,10), (11,18), (19,49), (50,67), (68,124), (125,152), (153,205), (206,267), (268,349), (350,409), (410,503), (504,508), (509,638), (639,659), (660,660), (661,672), (673,745), (746,750), (751,752), (753,769), (770,776), (777,794), (795,798), (799,800), (801,801), (802,823), (824,826), (827,847), (848,850), (851,860), (861,902), (903,911), (912,1006), (1007,1052), (1053,1054), (1055,1071), (1072,1089), (1090,1093), (1094,1097), (1098,1108), (1109,1110), (1111,1112), (1113,1114), (1115,1126), (1127,1131)]  # Define your custom page ranges here

split_pdf_custom_ranges(input_pdf_path, output_dir, page_ranges)

# Custom pages

In [None]:
import fitz  # PyMuPDF
import os

def extract_specific_pages(input_pdf_path, output_dir, page_numbers):
    """
    Extracts specific pages from the PDF and saves them as a separate PDF.

    Parameters:
    - input_pdf_path: Path to the input PDF file.
    - output_dir: Directory to save the output PDF.
    - page_numbers: List of page numbers to extract.
    """
    # Open the input PDF file
    pdf_document = fitz.open(input_pdf_path)
    total_pages = len(pdf_document)
    
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Create a new PDF for the specified pages
    extracted_pdf = fitz.open()
    
    for page_num in page_numbers:
        # Ensure the page number is within the document's range
        if page_num < 1 or page_num > total_pages:
            print(f"Invalid page number: {page_num}. Skipping.")
            continue
        
        # Insert the specified page into the new PDF
        extracted_pdf.insert_pdf(pdf_document, from_page=page_num - 1, to_page=page_num - 1)
    
    # Save the extracted pages as a new PDF
    output_pdf_path = os.path.join(output_dir, "extracted_pages.pdf")
    extracted_pdf.save(output_pdf_path)
    extracted_pdf.close()
    
    # Close the input PDF document
    pdf_document.close()




In [None]:
# Example usage:
input_pdf_path = "D:\\NTA 8800_2024 nl.pdf"
output_dir = "D:\Try21"
page_numbers = [3, 6, 9, 22]  # Specify the exact pages you want to extract

extract_specific_pages(input_pdf_path, output_dir, page_numbers)