In [1]:
import os
import subprocess
import sys

def doc_to_pdf(doc_path, output_dir, current_count, total_files):
    """
    Converts a Word document to PDF using LibreOffice command-line tools.

    Parameters:
        doc_path (str): The path to the input .doc or .docx file.
        output_dir (str): The directory where the output PDF will be saved.
        current_count (int): Current number of successful conversions
        total_files (int): Total number of files to process
    """
    try:
        # Get the original filename without extension
        original_filename = os.path.splitext(os.path.basename(doc_path))[0]
        
        # Create a temporary directory for conversion
        temp_dir = os.path.join(output_dir, "_temp")
        os.makedirs(temp_dir, exist_ok=True)
        
        # For Windows, you'll need to specify the full path to soffice.exe
        if os.name == 'nt':  # Windows
            soffice_path = r"C:\Program Files\LibreOffice\program\soffice.exe"
        else:  # Linux/Mac
            soffice_path = 'soffice'

        # Convert to PDF in temporary directory
        subprocess.check_call([
            soffice_path,
            '--headless',
            '--convert-to', 'pdf',
            '--outdir', temp_dir,
            doc_path
        ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        
        # Find the converted file in temp directory (it will be lowercase)
        temp_pdf = os.path.join(temp_dir, os.path.splitext(os.path.basename(doc_path))[0].lower() + '.pdf')
        
        # Move to final destination with original case
        final_pdf = os.path.join(output_dir, original_filename + '.pdf')
        os.replace(temp_pdf, final_pdf)
        
        # Remove temporary directory if empty
        try:
            os.rmdir(temp_dir)
        except OSError:
            pass  # Directory not empty or already deleted
            
        print(f"[{current_count + 1}/{total_files}] Successfully converted: {os.path.basename(doc_path)}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Failed to convert {os.path.basename(doc_path)}: {e}")
        return False
    except Exception as e:
        print(f"Error processing {os.path.basename(doc_path)}: {e}")
        return False

def batch_convert_docs(folder_path, output_dir):
    """
    Converts all .doc and .docx files in a folder to PDF using LibreOffice,
    and stores the PDFs in the specified output directory.

    Parameters:
        folder_path (str): The path to the folder containing Word documents.
        output_dir (str): The directory where the output PDFs will be saved.
    """
    # Ensure paths exist
    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"Input folder not found: {folder_path}")

    # Create the output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    print(f"Output directory ready: {output_dir}")

    # First count total files
    total_files = sum(1 for root, _, files in os.walk(folder_path)
                     for filename in files
                     if filename.lower().endswith(('.doc', '.docx')))

    print(f"\nFound {total_files} documents to convert")
    print("Starting conversion process...\n")

    # Track conversion statistics
    successful_conversions = 0
    failed_conversions = 0

    # Process all files
    for root, _, files in os.walk(folder_path):
        for filename in files:
            if filename.lower().endswith(('.doc', '.docx')):
                doc_path = os.path.join(root, filename)
                if doc_to_pdf(doc_path, output_dir, successful_conversions, total_files):
                    successful_conversions += 1
                else:
                    failed_conversions += 1

    # Print summary
    print("\nConversion Summary:")
    print(f"Total files found: {total_files}")
    print(f"Successfully converted: {successful_conversions}")
    print(f"Failed conversions: {failed_conversions}")

    # Calculate success rate
    if total_files > 0:
        success_rate = (successful_conversions / total_files) * 100
        print(f"Success rate: {success_rate:.1f}%")

if __name__ == "__main__":
    # Define your local paths here
    folder_path = r"F:\orig\CT_doc_files" # Change this
    output_dir = r"F:\orig\CT_doc_files_to_pdf"  # Change this
    os.makedirs(output_dir, exist_ok=True)

    # Verify paths exist
    if not os.path.exists(folder_path):
        print(f"Error: Input folder not found: {folder_path}")
        sys.exit(1)

    print(f"Starting conversion process...")
    print(f"Input folder: {folder_path}")
    print(f"Output folder: {output_dir}")

    try:
        batch_convert_docs(folder_path, output_dir)
    except Exception as e:
        print(f"An error occurred: {e}")

Starting conversion process...
Input folder: F:\orig\CT_doc_files
Output folder: F:\orig\CT_doc_files_to_pdf
Output directory ready: F:\orig\CT_doc_files_to_pdf

Found 1718 documents to convert
Starting conversion process...

[1/1718] Successfully converted: ! Vilas 60 HƯỚNG DẪN CHI TIẾT ĐO KIỂM.docx
[2/1718] Successfully converted: !!! Hop chuan, hop quy, do kiem.docx
[3/1718] Successfully converted: !!! Note.docx
[4/1718] Successfully converted: #10_HD VMPC-VECTOR 20231003 (FINAL).doc
[5/1718] Successfully converted: (RV1)HỢP ĐỒNG AMIVN- VECTOR cmt.docx
[6/1718] Successfully converted: 0. Bang ke chung tu thuoc Phu luc III.doc
[7/1718] Successfully converted: 0. Phụ lục HD 03_V2_Vector cmt.doc
[8/1718] Successfully converted: 01.PLDC.docx
[9/1718] Successfully converted: 0110-HĐMB-KTV-VIVN-2024 - cmt.doc
[10/1718] Successfully converted: 01_Purchase Contract_Terms  Conditions_G2.docx
[11/1718] Successfully converted: 02_Exhibit I_Schedule of Rates  Prices_G2.docx
[12/1718] Successful

: 