<a href="https://colab.research.google.com/github/drfperez/utilities/blob/main/PDFTool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import os
import nbformat

# ---------------- CONFIG ----------------
# Folder containing your notebooks
notebook_folder = "."  # current folder; change if needed

# Optional: Backup original notebooks
backup = True
backup_folder = "nb_backup"

# ---------------- SCRIPT ----------------
if backup and not os.path.exists(backup_folder):
    os.makedirs(backup_folder)

for filename in os.listdir(notebook_folder):
    if filename.endswith(".ipynb"):
        nb_path = os.path.join(notebook_folder, filename)
        print(f"Processing: {filename}")

        # Backup
        if backup:
            backup_path = os.path.join(backup_folder, filename)
            if not os.path.exists(backup_path):
                import shutil
                shutil.copy(nb_path, backup_path)

        # Load notebook
        nb = nbformat.read(nb_path, as_version=5)
        changed = False

        # Fix widgets metadata
        for cell in nb.cells:
            widgets_meta = cell.metadata.get("widgets")
            if widgets_meta is not None and "state" not in widgets_meta:
                widgets_meta["state"] = {}
                changed = True

        if changed:
            nbformat.write(nb, nb_path)
            print(f"  Fixed widget metadata ‚úî")
        else:
            print(f"  No changes needed")

print("\nAll notebooks processed!")
print("You can now use nbconvert without 'metadata.widgets' errors.")


All notebooks processed!
You can now use nbconvert without 'metadata.widgets' errors.


In [None]:
# ---------------------- CODE START ----------------------
!pip install -q pikepdf ipywidgets

import os, re, time, math
import pikepdf
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# ---------------- Upload PDFs ----------------
print("Upload one or more PDF files (order matters for merge).")
uploaded = files.upload()
if not uploaded:
    raise SystemExit("No PDFs uploaded.")

pdf_files = list(uploaded.keys())
print("Uploaded PDFs:")
for f in pdf_files:
    print(" -", f)

# ---------------- Widgets ----------------
mode = widgets.Dropdown(
    options=[
        ("Merge PDFs", "merge"),
        ("Split PDF (every page)", "split_all"),
        ("Split PDF (page ranges)", "split_ranges"),
        ("Extract / reorder pages", "extract"),
        ("Compress PDF", "compress")  # NEW: Add compress option
    ],
    description="Action:"
)

ranges_text = widgets.Text(
    value="1-3,5,7-9",
    description="Pages:",
    placeholder="Used for ranges/extract"
)

# NEW: Compression size dropdown
compress_size = widgets.Dropdown(
    options=[
        ("Small (max 1MB)", 1),
        ("Medium (max 2MB)", 2),
        ("Large (max 5MB)", 5),
        ("Very Small (max 500KB)", 0.5),
        ("Custom size", "custom")
    ],
    value=2,
    description="Target size:",
    disabled=False
)

# NEW: Custom size input (hidden by default)
custom_size = widgets.FloatText(
    value=1.0,
    description="Custom size (MB):",
    disabled=True,
    style={'description_width': 'initial'}
)

run_btn = widgets.Button(description="Run", button_style="success")
out = widgets.Output()

# NEW: Display compression widgets conditionally
def update_widgets(change):
    if change['new'] == 'compress':
        compress_size.disabled = False
        custom_size.disabled = (compress_size.value != 'custom')
    else:
        compress_size.disabled = True
        custom_size.disabled = True

mode.observe(update_widgets, names='value')

# Display all widgets
display(mode, ranges_text, compress_size, custom_size, run_btn, out)

# ---------------- Helpers ----------------
def parse_ranges(ranges, max_pages):
    """
    Parse page ranges like: 1-3,5,7-9
    Returns 0-based page indices
    """
    pages = []
    for part in ranges.split(","):
        if "-" in part:
            a, b = part.split("-")
            pages.extend(range(int(a)-1, int(b)))
        else:
            pages.append(int(part)-1)
    return [p for p in pages if 0 <= p < max_pages]

def download_link(path):
    display(HTML(f"<a href='files/{path}' target='_blank'>Download {path}</a>"))
    try:
        files.download(path)
    except:
        pass

# NEW: Compression helper function
def compress_pdf(input_path, output_path, target_size_mb, max_quality=90, min_quality=10):
    """
    Compress PDF by reducing image quality to achieve target file size
    target_size_mb: Target size in megabytes
    max_quality: Starting quality (0-100)
    min_quality: Minimum quality to try (0-100)
    """
    original_size = os.path.getsize(input_path) / (1024 * 1024)  # MB

    if original_size <= target_size_mb:
        print(f"PDF is already {original_size:.2f}MB (target: {target_size_mb}MB)")
        with pikepdf.open(input_path) as pdf:
            pdf.save(output_path, compress_streams=True, object_stream_mode=pikepdf.ObjectStreamMode.compress)
        return output_path

    print(f"Original size: {original_size:.2f}MB, Target: {target_size_mb}MB")

    # Try different compression levels
    quality = max_quality
    step = 10

    while quality >= min_quality:
        print(f"Trying quality: {quality}%")

        with pikepdf.open(input_path) as pdf:
            # Apply compression settings
            for page in pdf.pages:
                # Compress images if present
                for image_key, image in page.images.items():
                    try:
                        if '/DCTDecode' in str(image.Filter):
                            # JPEG image - reduce quality
                            image.Q = quality
                    except:
                        pass

            # Save with compression
            pdf.save(
                output_path,
                compress_streams=True,
                object_stream_mode=pikepdf.ObjectStreamMode.compress,
                stream_decode_level=pikepdf.StreamDecodeLevel.specialized
            )

        compressed_size = os.path.getsize(output_path) / (1024 * 1024)
        reduction = ((original_size - compressed_size) / original_size) * 100

        print(f"  Result: {compressed_size:.2f}MB ({reduction:.1f}% reduction)")

        if compressed_size <= target_size_mb or quality <= min_quality:
            break

        # Reduce quality for next attempt
        quality -= step

    final_size = os.path.getsize(output_path) / (1024 * 1024)
    if final_size > target_size_mb:
        print(f"Warning: Could not achieve target size. Best: {final_size:.2f}MB")
    else:
        print(f"Success: Compressed to {final_size:.2f}MB")

    return output_path

# ---------------- Main Logic ----------------
def run_action(b):
    with out:
        clear_output(wait=True)
        ts = int(time.time())

        if mode.value == "merge":
            print("Merging PDFs...")
            out_name = f"merged_{ts}.pdf"
            merged = pikepdf.Pdf.new()
            for f in pdf_files:
                with pikepdf.open(f) as pdf:
                    merged.pages.extend(pdf.pages)
            merged.save(out_name)
            print("Done:", out_name)
            download_link(out_name)

        elif mode.value == "split_all":
            print("Splitting PDF into single pages...")
            src = pdf_files[0]
            with pikepdf.open(src) as pdf:
                for i, page in enumerate(pdf.pages):
                    out_name = f"{os.path.splitext(src)[0]}_page_{i+1}.pdf"
                    single = pikepdf.Pdf.new()
                    single.pages.append(page)
                    single.save(out_name)
                    print("Created:", out_name)
            print("All pages split. Download from file list (left panel).")

        elif mode.value == "split_ranges":
            print("Splitting PDF by ranges...")
            src = pdf_files[0]
            with pikepdf.open(src) as pdf:
                pages = parse_ranges(ranges_text.value, len(pdf.pages))
                out_name = f"{os.path.splitext(src)[0]}_ranges_{ts}.pdf"
                out_pdf = pikepdf.Pdf.new()
                for p in pages:
                    out_pdf.pages.append(pdf.pages[p])
                out_pdf.save(out_name)
            print("Done:", out_name)
            download_link(out_name)

        elif mode.value == "extract":
            print("Extracting / reordering pages...")
            src = pdf_files[0]
            with pikepdf.open(src) as pdf:
                pages = parse_ranges(ranges_text.value, len(pdf.pages))
                out_name = f"{os.path.splitext(src)[0]}_extract_{ts}.pdf"
                out_pdf = pikepdf.Pdf.new()
                for p in pages:
                    out_pdf.pages.append(pdf.pages[p])
                out_pdf.save(out_name)
            print("Done:", out_name)
            download_link(out_name)

        # NEW: Compression action
        elif mode.value == "compress":
            print("Compressing PDF...")
            src = pdf_files[0]

            # Get target size
            if compress_size.value == "custom":
                target_size = custom_size.value
            else:
                target_size = compress_size.value

            out_name = f"{os.path.splitext(src)[0]}_compressed_{ts}.pdf"

            # Apply compression
            compress_pdf(src, out_name, target_size)

            print("Compression completed!")
            download_link(out_name)

run_btn.on_click(run_action)

# ---------------- Instructions ----------------
print("""
HOW TO USE:
- MERGE: upload multiple PDFs ‚Üí choose 'Merge PDFs' ‚Üí Run
- SPLIT ALL: upload ONE PDF ‚Üí choose 'Split PDF (every page)' ‚Üí Run
- SPLIT RANGES: upload ONE PDF ‚Üí choose 'Split PDF (page ranges)'
  Example: 1-3,5,8-10
- EXTRACT / REORDER:
  Example: 5,1,3-4 (output will follow this order)
- COMPRESS: upload ONE PDF ‚Üí choose 'Compress PDF' ‚Üí Select target size ‚Üí Run
  Note: Compression works best on PDFs with images
""")
# ---------------------- CODE END ----------------------

In [None]:

# Single-cell Google Colab PDF Merger / Splitter / Page Editor
# Features:
# - Upload one or more PDFs
# - Merge PDFs (in chosen order)
# - Split PDF (every page or page ranges)
# - Extract / reorder pages
# - Download result
#
# Paste this entire cell into Google Colab and run.

# ---------------------- CODE START ----------------------
!pip install -q pikepdf ipywidgets

import os, re, time
import pikepdf
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# ---------------- Upload PDFs ----------------
print("Upload one or more PDF files (order matters for merge).")
uploaded = files.upload()
if not uploaded:
    raise SystemExit("No PDFs uploaded.")

pdf_files = list(uploaded.keys())
print("Uploaded PDFs:")
for f in pdf_files:
    print(" -", f)

# ---------------- Widgets ----------------
mode = widgets.Dropdown(
    options=[
        ("Merge PDFs", "merge"),
        ("Split PDF (every page)", "split_all"),
        ("Split PDF (page ranges)", "split_ranges"),
        ("Extract / reorder pages", "extract")
    ],
    description="Action:"
)

ranges_text = widgets.Text(
    value="1-3,5,7-9",
    description="Pages:",
    placeholder="Used for ranges/extract"
)

run_btn = widgets.Button(description="Run", button_style="success")
out = widgets.Output()

display(mode, ranges_text, run_btn, out)

# ---------------- Helpers ----------------
def parse_ranges(ranges, max_pages):
    """
    Parse page ranges like: 1-3,5,7-9
    Returns 0-based page indices
    """
    pages = []
    for part in ranges.split(","):
        if "-" in part:
            a, b = part.split("-")
            pages.extend(range(int(a)-1, int(b)))
        else:
            pages.append(int(part)-1)
    return [p for p in pages if 0 <= p < max_pages]

def download_link(path):
    display(HTML(f"<a href='files/{path}' target='_blank'>Download {path}</a>"))
    try:
        files.download(path)
    except:
        pass

# ---------------- Main Logic ----------------
def run_action(b):
    with out:
        clear_output(wait=True)
        ts = int(time.time())

        if mode.value == "merge":
            print("Merging PDFs...")
            out_name = f"merged_{ts}.pdf"
            merged = pikepdf.Pdf.new()
            for f in pdf_files:
                with pikepdf.open(f) as pdf:
                    merged.pages.extend(pdf.pages)
            merged.save(out_name)
            print("Done:", out_name)
            download_link(out_name)

        elif mode.value == "split_all":
            print("Splitting PDF into single pages...")
            src = pdf_files[0]
            with pikepdf.open(src) as pdf:
                for i, page in enumerate(pdf.pages):
                    out_name = f"{os.path.splitext(src)[0]}_page_{i+1}.pdf"
                    single = pikepdf.Pdf.new()
                    single.pages.append(page)
                    single.save(out_name)
                    print("Created:", out_name)
            print("All pages split. Download from file list (left panel).")

        elif mode.value == "split_ranges":
            print("Splitting PDF by ranges...")
            src = pdf_files[0]
            with pikepdf.open(src) as pdf:
                pages = parse_ranges(ranges_text.value, len(pdf.pages))
                out_name = f"{os.path.splitext(src)[0]}_ranges_{ts}.pdf"
                out_pdf = pikepdf.Pdf.new()
                for p in pages:
                    out_pdf.pages.append(pdf.pages[p])
                out_pdf.save(out_name)
            print("Done:", out_name)
            download_link(out_name)

        elif mode.value == "extract":
            print("Extracting / reordering pages...")
            src = pdf_files[0]
            with pikepdf.open(src) as pdf:
                pages = parse_ranges(ranges_text.value, len(pdf.pages))
                out_name = f"{os.path.splitext(src)[0]}_extract_{ts}.pdf"
                out_pdf = pikepdf.Pdf.new()
                for p in pages:
                    out_pdf.pages.append(pdf.pages[p])
                out_pdf.save(out_name)
            print("Done:", out_name)
            download_link(out_name)

run_btn.on_click(run_action)

# ---------------- Instructions ----------------
print("""
HOW TO USE:
- MERGE: upload multiple PDFs ‚Üí choose 'Merge PDFs' ‚Üí Run
- SPLIT ALL: upload ONE PDF ‚Üí choose 'Split PDF (every page)' ‚Üí Run
- SPLIT RANGES: upload ONE PDF ‚Üí choose 'Split PDF (page ranges)'
  Example: 1-3,5,8-10
- EXTRACT / REORDER:
  Example: 5,1,3-4 (output will follow this order)
""")
# ---------------------- CODE END ----------------------

Extracting / reordering pages...
Done: openair_book_complete_extract_1770703008.pdf


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:

# ---------------------- CODE START ----------------------
# Install required packages
!apt-get update -qq
!apt-get install -y -qq wkhtmltopdf
!pip install -q pdfkit requests beautifulsoup4 PyPDF2 pikepdf ipywidgets pillow

import os, re, time, math, io
import pikepdf
from PyPDF2 import PdfMerger, PdfReader, PdfWriter
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output, Javascript
from google.colab import files, drive
from PIL import Image
import numpy as np

# Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')
print("‚úì Google Drive mounted!")

# ---------------- File Selection Widgets ----------------
print("\nüìÅ Please select your working folder in Google Drive")

# Create widgets for folder navigation
folder_path = widgets.Text(
    value='/content/drive/MyDrive/',
    description='Path:',
    layout=widgets.Layout(width='80%')
)

refresh_btn = widgets.Button(
    description='üìÇ List Files',
    button_style='info',
    layout=widgets.Layout(width='20%')
)

file_list = widgets.SelectMultiple(
    options=[],
    description='PDF Files:',
    layout=widgets.Layout(height='200px', width='100%')
)

out_info = widgets.Output()

# Display folder navigation widgets
display(folder_path, widgets.HBox([refresh_btn]), file_list, out_info)

def list_files_in_folder(path):
    """List all PDF files in the specified folder"""
    pdf_files = []
    try:
        all_items = os.listdir(path)
        for item in all_items:
            full_path = os.path.join(path, item)
            if os.path.isfile(full_path) and item.lower().endswith('.pdf'):
                pdf_files.append(item)
        return sorted(pdf_files)
    except Exception as e:
        print(f"Error listing files: {e}")
        return []

def refresh_files(b):
    with out_info:
        clear_output(wait=True)
        current_path = folder_path.value
        if not os.path.exists(current_path):
            print(f"‚ùå Path doesn't exist: {current_path}")
            return

        print(f"üìÅ Listing files in: {current_path}")
        pdfs = list_files_in_folder(current_path)
        if pdfs:
            file_list.options = pdfs
            print(f"‚úÖ Found {len(pdfs)} PDF file(s)")
            for pdf in pdfs:
                print(f"   ‚Ä¢ {pdf}")
        else:
            file_list.options = []
            print("‚ùå No PDF files found in this folder")

refresh_btn.on_click(refresh_files)

# Initial file listing
refresh_files(None)

# ---------------- Operation Widgets ----------------
print("\n" + "="*60)
print("üìã PDF OPERATION SELECTION")
print("="*60)

mode = widgets.Dropdown(
    options=[
        ("üìÑ View PDF Info", "info"),
        ("üîÑ Merge PDFs", "merge"),
        ("‚úÇÔ∏è Split PDF (every page)", "split_all"),
        ("üî¢ Split PDF (page ranges)", "split_ranges"),
        ("üìë Extract / Reorder Pages", "extract"),
        ("üóúÔ∏è Compress PDF", "compress"),
        ("üñºÔ∏è Convert to Images", "to_images"),
        ("üìÑ Images to PDF", "images_to_pdf")
    ],
    value="info",
    description="Action:",
    layout=widgets.Layout(width='50%')
)

# Page ranges input
ranges_text = widgets.Text(
    value="1-3,5,7-9",
    description="Pages:",
    placeholder="e.g., 1-3,5,7-9",
    layout=widgets.Layout(width='50%')
)

# Compression settings
compress_mode = widgets.Dropdown(
    options=[
        ("Size-based (target MB)", "size"),
        ("Quality-based (0-100%)", "quality"),
        ("Aggressive (max compression)", "aggressive")
    ],
    value="size",
    description="Compression:",
    layout=widgets.Layout(width='50%')
)

target_size = widgets.FloatSlider(
    value=2.0,
    min=0.1,
    max=20.0,
    step=0.1,
    description="Target (MB):",
    disabled=False,
    layout=widgets.Layout(width='50%')
)

quality_level = widgets.IntSlider(
    value=75,
    min=10,
    max=95,
    step=5,
    description="Quality (%):",
    disabled=True,
    layout=widgets.Layout(width='50%')
)

# Image conversion settings
dpi_setting = widgets.Dropdown(
    options=[
        ("Standard (150 DPI)", 150),
        ("High (300 DPI)", 300),
        ("Low (72 DPI)", 72)
    ],
    value=150,
    description="Image DPI:",
    disabled=True,
    layout=widgets.Layout(width='50%')
)

run_btn = widgets.Button(
    description="üöÄ Run Operation",
    button_style='success',
    layout=widgets.Layout(width='200px')
)

output_display = widgets.Output()

# Update widget visibility based on mode
def update_widgets(change):
    if change['new'] == 'compress':
        compress_mode.disabled = False
        if compress_mode.value == 'size':
            target_size.disabled = False
            quality_level.disabled = True
        else:
            target_size.disabled = True
            quality_level.disabled = False
        ranges_text.disabled = True
        dpi_setting.disabled = True
    elif change['new'] in ['split_ranges', 'extract']:
        ranges_text.disabled = False
        compress_mode.disabled = True
        target_size.disabled = True
        quality_level.disabled = True
        dpi_setting.disabled = True
    elif change['new'] == 'to_images':
        ranges_text.disabled = False
        compress_mode.disabled = True
        target_size.disabled = True
        quality_level.disabled = True
        dpi_setting.disabled = False
    else:
        ranges_text.disabled = True
        compress_mode.disabled = True
        target_size.disabled = True
        quality_level.disabled = True
        dpi_setting.disabled = True

mode.observe(update_widgets, names='value')

def update_compress_widgets(change):
    if change['new'] == 'size':
        target_size.disabled = False
        quality_level.disabled = True
    else:
        target_size.disabled = True
        quality_level.disabled = False

compress_mode.observe(update_compress_widgets, names='value')

# Display all widgets
display(widgets.VBox([
    widgets.HBox([mode, ranges_text]),
    widgets.HBox([compress_mode, target_size]),
    widgets.HBox([quality_level, dpi_setting]),
    run_btn,
    output_display
]))

# ---------------- Helper Functions ----------------
def get_full_path(filename):
    """Get full path for a file in current folder"""
    return os.path.join(folder_path.value, filename)

def parse_ranges(ranges_str, max_pages):
    """Parse page ranges like: 1-3,5,7-9"""
    if not ranges_str.strip():
        return list(range(max_pages))

    pages = []
    for part in ranges_str.split(","):
        part = part.strip()
        if "-" in part:
            try:
                a, b = part.split("-")
                start = max(1, int(a))
                end = min(max_pages, int(b))
                pages.extend(range(start-1, end))
            except:
                continue
        elif part.isdigit():
            page_num = int(part)
            if 1 <= page_num <= max_pages:
                pages.append(page_num - 1)
    return list(set(pages))  # Remove duplicates

def get_pdf_info(pdf_path):
    """Get information about PDF file"""
    info = []
    try:
        with open(pdf_path, 'rb') as f:
            pdf = PdfReader(f)
            info.append(f"File: {os.path.basename(pdf_path)}")
            info.append(f"Size: {os.path.getsize(pdf_path) / (1024*1024):.2f} MB")
            info.append(f"Pages: {len(pdf.pages)}")
            info.append(f"Encrypted: {'Yes' if pdf.is_encrypted else 'No'}")

            # Get metadata
            metadata = pdf.metadata
            if metadata:
                info.append("\nMetadata:")
                for key, value in metadata.items():
                    if key.startswith('/'):
                        info.append(f"  {key[1:]}: {value}")
    except Exception as e:
        info.append(f"Error reading PDF: {e}")
    return info

def compress_pdf_size_based(input_path, output_path, target_mb):
    """Compress PDF to target size"""
    original_size = os.path.getsize(input_path) / (1024 * 1024)

    if original_size <= target_mb:
        # Just copy with basic compression
        with pikepdf.open(input_path) as pdf:
            pdf.save(output_path, compress_streams=True)
        return True, original_size

    # Try progressive compression
    for quality in [85, 70, 55, 40, 25, 10]:
        with pikepdf.open(input_path) as pdf:
            pdf.save(
                output_path,
                compress_streams=True,
                object_stream_mode=pikepdf.ObjectStreamMode.compress,
                stream_decode_level=pikepdf.StreamDecodeLevel.generalized
            )

        new_size = os.path.getsize(output_path) / (1024 * 1024)
        if new_size <= target_mb:
            return True, new_size

    return False, new_size

def compress_pdf_quality_based(input_path, output_path, quality):
    """Compress PDF with quality setting"""
    with pikepdf.open(input_path) as pdf:
        pdf.save(
            output_path,
            compress_streams=True,
            object_stream_mode=pikepdf.ObjectStreamMode.generate
        )
    return True

def pdf_to_images(pdf_path, output_folder, dpi=150, pages=None):
    """Convert PDF pages to images"""
    from pdf2image import convert_from_path
    import os

    os.makedirs(output_folder, exist_ok=True)

    images = convert_from_path(pdf_path, dpi=dpi)

    saved_images = []
    for i, image in enumerate(images):
        if pages is None or i in pages:
            img_path = os.path.join(output_folder, f"page_{i+1:03d}.jpg")
            image.save(img_path, 'JPEG', quality=85)
            saved_images.append(img_path)

    return saved_images

def images_to_pdf(images_folder, output_pdf):
    """Convert images to PDF"""
    from PIL import Image
    import os

    image_files = sorted([f for f in os.listdir(images_folder)
                         if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))])

    if not image_files:
        return False

    images = []
    for img_file in image_files:
        img_path = os.path.join(images_folder, img_file)
        img = Image.open(img_path)
        if img.mode in ('RGBA', 'LA'):
            img = img.convert('RGB')
        images.append(img)

    images[0].save(output_pdf, save_all=True, append_images=images[1:])
    return True

# ---------------- Main Operation Function ----------------
def run_operation(b):
    with output_display:
        clear_output(wait=True)

        if not file_list.value:
            print("‚ùå Please select at least one PDF file!")
            return

        selected_files = list(file_list.value)
        current_folder = folder_path.value

        print(f"üìÅ Working in: {current_folder}")
        print(f"üìÑ Selected {len(selected_files)} file(s)")

        timestamp = int(time.time())

        if mode.value == "info":
            print("\nüìä PDF INFORMATION")
            print("-" * 40)
            for filename in selected_files:
                full_path = get_full_path(filename)
                info = get_pdf_info(full_path)
                for line in info:
                    print(line)
                print("-" * 40)

        elif mode.value == "merge":
            print("\nüîÑ MERGING PDFS")
            print("-" * 40)

            merger = PdfMerger()
            for filename in selected_files:
                full_path = get_full_path(filename)
                merger.append(full_path)
                print(f"‚úì Added: {filename}")

            output_name = f"merged_{timestamp}.pdf"
            output_path = os.path.join(current_folder, output_name)
            merger.write(output_path)
            merger.close()

            print(f"\n‚úÖ Merged {len(selected_files)} PDFs")
            print(f"üìÑ Output: {output_name}")
            print(f"üíæ Saved to: {current_folder}")

        elif mode.value == "split_all":
            print("\n‚úÇÔ∏è SPLITTING INTO SINGLE PAGES")
            print("-" * 40)

            for filename in selected_files:
                full_path = get_full_path(filename)
                with open(full_path, 'rb') as f:
                    pdf = PdfReader(f)

                    base_name = os.path.splitext(filename)[0]
                    for i in range(len(pdf.pages)):
                        writer = PdfWriter()
                        writer.add_page(pdf.pages[i])

                        output_name = f"{base_name}_page_{i+1:03d}.pdf"
                        output_path = os.path.join(current_folder, output_name)

                        with open(output_path, 'wb') as out_file:
                            writer.write(out_file)

                        print(f"‚úì Created: {output_name}")

        elif mode.value == "split_ranges":
            print("\nüî¢ SPLITTING BY PAGE RANGES")
            print("-" * 40)

            for filename in selected_files:
                full_path = get_full_path(filename)
                with open(full_path, 'rb') as f:
                    pdf = PdfReader(f)
                    total_pages = len(pdf.pages)

                    pages = parse_ranges(ranges_text.value, total_pages)

                    base_name = os.path.splitext(filename)[0]
                    output_name = f"{base_name}_extract_{timestamp}.pdf"
                    output_path = os.path.join(current_folder, output_name)

                    writer = PdfWriter()
                    for page_num in pages:
                        writer.add_page(pdf.pages[page_num])

                    with open(output_path, 'wb') as out_file:
                        writer.write(out_file)

                    print(f"‚úì Created: {output_name}")
                    print(f"  Pages extracted: {len(pages)}/{total_pages}")

        elif mode.value == "extract":
            print("\nüìë EXTRACTING/REORDERING PAGES")
            print("-" * 40)

            for filename in selected_files:
                full_path = get_full_path(filename)
                with open(full_path, 'rb') as f:
                    pdf = PdfReader(f)
                    total_pages = len(pdf.pages)

                    pages = parse_ranges(ranges_text.value, total_pages)

                    base_name = os.path.splitext(filename)[0]
                    output_name = f"{base_name}_reordered_{timestamp}.pdf"
                    output_path = os.path.join(current_folder, output_name)

                    writer = PdfWriter()
                    for page_num in pages:
                        writer.add_page(pdf.pages[page_num])

                    with open(output_path, 'wb') as out_file:
                        writer.write(out_file)

                    print(f"‚úì Created: {output_name}")
                    print(f"  Pages in new order: {len(pages)}")

        elif mode.value == "compress":
            print("\nüóúÔ∏è COMPRESSING PDF")
            print("-" * 40)

            for filename in selected_files:
                full_path = get_full_path(filename)
                original_size = os.path.getsize(full_path) / (1024 * 1024)

                base_name = os.path.splitext(filename)[0]

                if compress_mode.value == "size":
                    target_mb = target_size.value
                    output_name = f"{base_name}_compressed_{target_mb}MB_{timestamp}.pdf"
                    output_path = os.path.join(current_folder, output_name)

                    success, final_size = compress_pdf_size_based(full_path, output_path, target_mb)

                    if success:
                        reduction = ((original_size - final_size) / original_size) * 100
                        print(f"‚úì {filename}")
                        print(f"  Original: {original_size:.2f}MB ‚Üí Final: {final_size:.2f}MB")
                        print(f"  Reduction: {reduction:.1f}%")
                    else:
                        print(f"‚ö†Ô∏è {filename} - Could not reach target size")
                        print(f"  Best achieved: {final_size:.2f}MB (target: {target_mb}MB)")

                elif compress_mode.value == "quality":
                    quality = quality_level.value
                    output_name = f"{base_name}_q{quality}_{timestamp}.pdf"
                    output_path = os.path.join(current_folder, output_name)

                    compress_pdf_quality_based(full_path, output_path, quality)
                    final_size = os.path.getsize(output_path) / (1024 * 1024)
                    reduction = ((original_size - final_size) / original_size) * 100

                    print(f"‚úì {filename}")
                    print(f"  Quality: {quality}%")
                    print(f"  Original: {original_size:.2f}MB ‚Üí Final: {final_size:.2f}MB")
                    print(f"  Reduction: {reduction:.1f}%")

                else:  # aggressive
                    output_name = f"{base_name}_max_compressed_{timestamp}.pdf"
                    output_path = os.path.join(current_folder, output_name)

                    with pikepdf.open(full_path) as pdf:
                        pdf.save(
                            output_path,
                            compress_streams=True,
                            object_stream_mode=pikepdf.ObjectStreamMode.compress,
                            stream_decode_level=pikepdf.StreamDecodeLevel.specialized
                        )

                    final_size = os.path.getsize(output_path) / (1024 * 1024)
                    reduction = ((original_size - final_size) / original_size) * 100

                    print(f"‚úì {filename}")
                    print(f"  Max compression applied")
                    print(f"  Original: {original_size:.2f}MB ‚Üí Final: {final_size:.2f}MB")
                    print(f"  Reduction: {reduction:.1f}%")

        elif mode.value == "to_images":
            print("\nüñºÔ∏è CONVERTING PDF TO IMAGES")
            print("-" * 40)

            for filename in selected_files:
                full_path = get_full_path(filename)
                base_name = os.path.splitext(filename)[0]
                output_folder = os.path.join(current_folder, f"{base_name}_images_{timestamp}")

                with open(full_path, 'rb') as f:
                    pdf = PdfReader(f)
                    total_pages = len(pdf.pages)
                    pages = parse_ranges(ranges_text.value, total_pages)

                # Note: pdf2image requires poppler
                try:
                    !apt-get install -y poppler-utils
                    from pdf2image import convert_from_path

                    images = convert_from_path(full_path, dpi=dpi_setting.value)

                    os.makedirs(output_folder, exist_ok=True)

                    for i, image in enumerate(images):
                        if not pages or i in pages:
                            img_path = os.path.join(output_folder, f"page_{i+1:03d}.jpg")
                            image.save(img_path, 'JPEG', quality=90)
                            print(f"‚úì Created: {os.path.basename(img_path)}")

                    print(f"\nüìÅ Images saved to: {output_folder}")

                except ImportError:
                    print("Installing pdf2image...")
                    !pip install -q pdf2image
                    from pdf2image import convert_from_path

                    images = convert_from_path(full_path, dpi=dpi_setting.value)

                    os.makedirs(output_folder, exist_ok=True)

                    for i, image in enumerate(images):
                        if not pages or i in pages:
                            img_path = os.path.join(output_folder, f"page_{i+1:03d}.jpg")
                            image.save(img_path, 'JPEG', quality=90)
                            print(f"‚úì Created: {os.path.basename(img_path)}")

                    print(f"\nüìÅ Images saved to: {output_folder}")

        elif mode.value == "images_to_pdf":
            print("\nüìÑ CONVERTING IMAGES TO PDF")
            print("-" * 40)

            # Assume selected files are image folders
            for folder_name in selected_files:
                folder_path_full = get_full_path(folder_name)
                if os.path.isdir(folder_path_full):
                    output_name = f"{folder_name}_converted_{timestamp}.pdf"
                    output_path = os.path.join(current_folder, output_name)

                    if images_to_pdf(folder_path_full, output_path):
                        print(f"‚úì Created PDF from: {folder_name}")
                        print(f"  Output: {output_name}")
                    else:
                        print(f"‚ùå No images found in: {folder_name}")

        print("\n" + "="*60)
        print("‚úÖ Operation completed!")
        print(f"üìÅ Files are saved in: {current_folder}")
        print("="*60)

run_btn.on_click(run_operation)

# ---------------- Instructions ----------------
print("\n" + "="*60)
print("üìñ HOW TO USE THIS TOOL")
print("="*60)
print("""
1. üìÅ FOLDER SELECTION:
   ‚Ä¢ Navigate to your desired Google Drive folder using the path input
   ‚Ä¢ Click 'List Files' to see all PDFs in that folder
   ‚Ä¢ Select one or more PDFs from the list

2. üõ†Ô∏è AVAILABLE OPERATIONS:
   ‚Ä¢ üìÑ View PDF Info: See details about selected PDFs
   ‚Ä¢ üîÑ Merge PDFs: Combine multiple PDFs into one
   ‚Ä¢ ‚úÇÔ∏è Split PDF: Split into individual pages
   ‚Ä¢ üî¢ Split by Ranges: Extract specific page ranges
   ‚Ä¢ üìë Extract/Reorder: Pick pages in any order
   ‚Ä¢ üóúÔ∏è Compress PDF: Reduce file size with different methods
   ‚Ä¢ üñºÔ∏è To Images: Convert PDF pages to JPG images
   ‚Ä¢ üìÑ Images to PDF: Combine images into a PDF

3. ‚öôÔ∏è COMPRESSION OPTIONS:
   ‚Ä¢ Size-based: Target a specific file size (in MB)
   ‚Ä¢ Quality-based: Set compression quality (0-100%)
   ‚Ä¢ Aggressive: Maximum compression (lower quality)

4. üíæ OUTPUT:
   ‚Ä¢ All output files are saved in the SAME Google Drive folder
   ‚Ä¢ Files are timestamped to avoid overwriting
   ‚Ä¢ Check your Google Drive folder for the results

5. üìù PAGE RANGES FORMAT:
   ‚Ä¢ Single pages: 1,3,5
   ‚Ä¢ Ranges: 1-5,10-15
   ‚Ä¢ Mixed: 1-3,5,8-10
   ‚Ä¢ Leave empty for all pages

TIP: Use 'View PDF Info' first to check page count before splitting!
""")
print("="*60)

# Initial refresh
refresh_files(None)
# ---------------------- CODE END ----------------------

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Extracting templates from packages: 100%
(Reading database ... 117540 files and directories currently installed.)
Preparing to unpack .../00-libavahi-client3_0.8-5ubuntu5.4_amd64.deb ...
Unpacking libavahi-client3:amd64 (0.8-5ubuntu5.4) over (0.8-5ubuntu5.3) ...
Preparing to unpack .../01-libavahi-common3_0.8-5ubuntu5.4_amd64.deb ...
Unpacking libavahi-common3:amd64 (0.8-5ubuntu5.4) over (0.8-5ubuntu5.3) ...
Preparing to unpack .../02-libavahi-common-data_0.8-5ubuntu5.4_amd64.deb ...
Unpacking libavahi-common-data:amd64 (0.8-5ubuntu5.4) over (0.8-5ubuntu5.3) ...
Selecting previously unselected package libavahi-core7:amd64.
Preparing to unpack .../03-libavahi-core7_0.8-5ubuntu5.4_amd64.deb ...
Unpacking libavahi-core7:amd64 (0.8-5ubuntu5.4) ...
Selecting previously unselected package libdaemon0:amd64.

Text(value='/content/drive/MyDrive/', description='Path:', layout=Layout(width='80%'))

HBox(children=(Button(button_style='info', description='üìÇ List Files', layout=Layout(width='20%'), style=Butto‚Ä¶

SelectMultiple(description='PDF Files:', layout=Layout(height='200px', width='100%'), options=(), value=())

Output()


üìã PDF OPERATION SELECTION


VBox(children=(HBox(children=(Dropdown(description='Action:', layout=Layout(width='50%'), options=(('üìÑ View PD‚Ä¶


üìñ HOW TO USE THIS TOOL

1. üìÅ FOLDER SELECTION:
   ‚Ä¢ Navigate to your desired Google Drive folder using the path input
   ‚Ä¢ Click 'List Files' to see all PDFs in that folder
   ‚Ä¢ Select one or more PDFs from the list

2. üõ†Ô∏è AVAILABLE OPERATIONS:
   ‚Ä¢ üìÑ View PDF Info: See details about selected PDFs
   ‚Ä¢ üîÑ Merge PDFs: Combine multiple PDFs into one
   ‚Ä¢ ‚úÇÔ∏è Split PDF: Split into individual pages
   ‚Ä¢ üî¢ Split by Ranges: Extract specific page ranges
   ‚Ä¢ üìë Extract/Reorder: Pick pages in any order
   ‚Ä¢ üóúÔ∏è Compress PDF: Reduce file size with different methods
   ‚Ä¢ üñºÔ∏è To Images: Convert PDF pages to JPG images
   ‚Ä¢ üìÑ Images to PDF: Combine images into a PDF

3. ‚öôÔ∏è COMPRESSION OPTIONS:
   ‚Ä¢ Size-based: Target a specific file size (in MB)
   ‚Ä¢ Quality-based: Set compression quality (0-100%)
   ‚Ä¢ Aggressive: Maximum compression (lower quality)

4. üíæ OUTPUT:
   ‚Ä¢ All output files are saved in the SAME Google Drive folder