<a href="https://colab.research.google.com/github/alirzaMhd/utilities/blob/master/ppt_to_pdf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title 1. Install Dependencies (Run this once)
!echo "Installing LibreOffice... (this may take a minute)"
!apt-get -q update
!apt-get -q install libreoffice > /dev/null
!echo "LibreOffice installed."

!echo "Installing Python PDF libraries..."
!pip install -q pypdf reportlab
!echo "Done."

Installing LibreOffice... (this may take a minute)
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:2 https://cli.github.com/packages stable InRelease [3,917 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:6 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [83.8 kB]
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:8 https://cli.github.com/packages stable/main amd64 Packages [356 B]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:10 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [9,694 kB]
Get:11 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:12 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease [24.6 kB]
Get:13 http://security.ubuntu.com/ubuntu jammy-securit

In [None]:
import os
import subprocess
import glob
from pypdf import PdfWriter, PdfReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import io

# @title 2. Run the Converter Application

def find_office_files(directory='/content/'):
    # Extensions to look for
    extensions = ['*.pptx', '*.ppt', '*.docx', '*.doc']
    files = []
    for ext in extensions:
        files.extend(glob.glob(os.path.join(directory, ext)))
    return sorted(files)

def create_marker_page(text):
    """Creates a temporary PDF page in memory with specific text."""
    packet = io.BytesIO()
    # Create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=letter)
    width, height = letter

    # Draw a background rectangle/banner
    can.setFillColorRGB(0.9, 0.9, 0.9) # Light grey
    can.rect(0, height/2 - 50, width, 100, fill=1, stroke=0)

    # Draw text
    can.setFillColorRGB(0, 0, 0) # Black
    can.setFont("Helvetica-Bold", 24)
    text_width = can.stringWidth(text, "Helvetica-Bold", 24)
    can.drawString((width - text_width) / 2, height / 2 - 10, text)

    can.save()
    packet.seek(0)
    return PdfReader(packet)

def convert_to_pdf(input_path, output_folder='/content/temp_conversion'):
    """Uses LibreOffice to convert PPT/DOC to PDF."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # LibreOffice headless command
    cmd = [
        'libreoffice', '--headless', '--convert-to', 'pdf',
        '--outdir', output_folder, input_path
    ]

    try:
        subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

        # Determine the expected output filename
        base_name = os.path.basename(input_path)
        name_no_ext = os.path.splitext(base_name)[0]
        pdf_path = os.path.join(output_folder, name_no_ext + ".pdf")

        if os.path.exists(pdf_path):
            return pdf_path
        else:
            print(f"Error: Could not find converted PDF for {base_name}")
            return None
    except subprocess.CalledProcessError:
        print(f"Error: Failed to convert {input_path}")
        return None

def main():
    print("Scanning /content/ for PPT and Word files...")
    files = find_office_files()

    if not files:
        print("No .ppt, .pptx, .doc, or .docx files found in /content/.")
        print("Please upload your files to the files tab on the left.")
        return

    # 1. User Selection and Ordering
    print(f"\nFound {len(files)} files:")
    for i, f in enumerate(files):
        print(f"[{i}] {os.path.basename(f)}")

    print("\n------------------------------------------------")
    print("Enter the indexes of the files in the order you want them.")
    print("Separate numbers with commas (e.g., '2, 0, 1').")
    user_input = input("Order: ")

    try:
        selected_indices = [int(x.strip()) for x in user_input.split(',') if x.strip().isdigit()]
        ordered_files = [files[i] for i in selected_indices if i < len(files)]
    except Exception as e:
        print("Invalid input. Please enter numbers separated by commas.")
        return

    if not ordered_files:
        print("No valid files selected.")
        return

    # 2. Processing
    merger = PdfWriter()
    temp_folder = '/content/temp_conversion'

    print("\nProcessing files...")

    for f_path in ordered_files:
        filename = os.path.basename(f_path)
        print(f"Converting: {filename} ...")

        # A. Convert file to PDF
        converted_pdf_path = convert_to_pdf(f_path, temp_folder)

        if converted_pdf_path:
            # B. Create Start Marker
            start_marker = create_marker_page(f"START OF: {filename}")
            merger.add_page(start_marker.pages[0])

            # C. Add the converted content
            reader = PdfReader(converted_pdf_path)
            for page in reader.pages:
                merger.add_page(page)

            # D. Create End Marker
            end_marker = create_marker_page(f"END OF: {filename}")
            merger.add_page(end_marker.pages[0])

    # 3. Saving
    output_filename = '/content/Final_Combined_Binder.pdf'
    with open(output_filename, 'wb') as f_out:
        merger.write(f_out)

    print("\n------------------------------------------------")
    print("SUCCESS!")
    print(f"All files combined into: {output_filename}")
    print("Download it from the files tab on the left.")

    # Cleanup temp folder (optional)
    import shutil
    if os.path.exists(temp_folder):
        shutil.rmtree(temp_folder)

if __name__ == "__main__":
    main()

Scanning /content/ for PPT and Word files...

Found 12 files:
[0] Alkaline phosphatase.pptx
[1] ENZYME .pptx
[2] Nucleotide  and roles.ppt
[3] Plasma Proteins1.ppt
[4] cholesterol and triglyceride.pptx
[5] membrane.ppt
[6] امینواسیدها ساختار و متابولیسم.pptx
[7] لیپید (1).pptx
[8] مقدمه،_طبقه_بندی،_رسپتور_و_مکانیسم_عمل.pptx
[9] کاتابوليسم_و_بيوسنتز_اسيدهاي_چرب.pptx
[10] کربوهیدرات (ساختار).pptx
[11] کروماتوگرافی with voice(1).pptx

------------------------------------------------
Enter the indexes of the files in the order you want them.
Separate numbers with commas (e.g., '2, 0, 1').
Order: 10,7,9,6,3,2,5,1,8,4,0,11

Processing files...
Converting: کربوهیدرات (ساختار).pptx ...
Converting: لیپید (1).pptx ...
Converting: کاتابوليسم_و_بيوسنتز_اسيدهاي_چرب.pptx ...
Converting: امینواسیدها ساختار و متابولیسم.pptx ...
Converting: Plasma Proteins1.ppt ...
Converting: Nucleotide  and roles.ppt ...
Converting: membrane.ppt ...
Converting: ENZYME .pptx ...
Converting: مقدمه،_طبقه_بندی،_رسپتور_و_