In [1]:
import os
import re
import sys
from pathlib import Path

# pip install pywin32
import win32com.client as win32  # type: ignore

In [2]:
SUPPORTED_EXTS = {".csv", ".xlsx", ".xlsm", ".pptx", ".docx"}

# Office COM numeric constants (so we don't need makepy)
WD_EXPORT_PDF = 17          # wdExportFormatPDF
XL_TYPE_PDF = 0             # xlTypePDF
PP_SAVE_AS_PDF = 32         # ppSaveAsPDF
XL_SHEET_VISIBLE = -1       # xlSheetVisible (-1), hidden=0, veryHidden=2


In [3]:
def sanitize_filename_part(name: str, max_len: int = 80) -> str:
    name = re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name)
    name = name.strip().strip(".")
    if len(name) > max_len:
        name = name[:max_len].rstrip()
    return name or "Sheet"

In [4]:
class OfficeApps:
    """Late-binding (DispatchEx) so makepy is NOT required."""
    def __init__(self):
        self.word = None
        self.excel = None
        self.ppt = None

    def get_word(self):
        if self.word is None:
            self.word = win32.DispatchEx("Word.Application")  # late-binding
            self.word.Visible = False
        return self.word

    def get_excel(self):
        if self.excel is None:
            self.excel = win32.DispatchEx("Excel.Application")  # late-binding
            self.excel.Visible = False
            self.excel.DisplayAlerts = False
        return self.excel

    def get_ppt(self):
        if self.ppt is None:
            self.ppt = win32.DispatchEx("PowerPoint.Application")  # late-binding
            # No window shown by default
        return self.ppt

    def close(self):
        for app in ("word", "excel", "ppt"):
            inst = getattr(self, app, None)
            if inst is not None:
                try:
                    inst.Quit()
                except Exception:
                    pass

In [5]:
def export_word_to_pdf(word_app, src: Path, dst_pdf: Path):
    doc = None
    try:
        doc = word_app.Documents.Open(str(src))
        doc.ExportAsFixedFormat(str(dst_pdf), WD_EXPORT_PDF)
    finally:
        if doc is not None:
            doc.Close(False)


In [6]:
def export_ppt_to_pdf(ppt_app, src: Path, dst_pdf: Path):
    pres = None
    try:
        pres = ppt_app.Presentations.Open(str(src), WithWindow=False)
        pres.SaveAs(str(dst_pdf), PP_SAVE_AS_PDF)
    finally:
        if pres is not None:
            pres.Close()


In [7]:
def export_excel_sheet_pdfs(excel_app, src: Path, out_dir: Path):
    """
    Reliable Excel export:
    - If 1 visible sheet -> <stem>.pdf
    - If >1 visible sheets -> one PDF per sheet: <stem>_<sheet>.pdf
    Uses a temp 1-sheet workbook per sheet to avoid COM flakiness.
    """
    wb = None
    try:
        # ReadOnly avoids prompts; UpdateLinks=0 skips link updates
        wb = excel_app.Workbooks.Open(str(src), UpdateLinks=0, ReadOnly=True)

        visible_sheets = [ws for ws in wb.Worksheets if ws.Visible == XL_SHEET_VISIBLE]

        # No visible sheets -> export whole workbook
        if len(visible_sheets) == 0:
            target = out_dir / f"{src.stem}.pdf"
            wb.ExportAsFixedFormat(XL_TYPE_PDF, str(target))
            return

        if len(visible_sheets) == 1:
            # Export the single visible sheet only
            target = out_dir / f"{src.stem}.pdf"
            visible_sheets[0].Copy()   # creates new 1-sheet workbook as ActiveWorkbook
            tmp = excel_app.ActiveWorkbook
            try:
                tmp.ExportAsFixedFormat(XL_TYPE_PDF, str(target))
            finally:
                tmp.Close(False)
            return

        # Multiple visible sheets -> one PDF per sheet
        print(f"{src.name}: {len(visible_sheets)} visible sheets -> exporting each to its own PDF")
        for ws in visible_sheets:
            sheet_name = sanitize_filename_part(ws.Name)
            target = out_dir / f"{src.stem}_{sheet_name}.pdf"

            ws.Copy()   # new workbook with just this sheet
            tmp = excel_app.ActiveWorkbook
            try:
                tmp.ExportAsFixedFormat(XL_TYPE_PDF, str(target))
            finally:
                tmp.Close(False)

    finally:
        if wb is not None:
            wb.Close(False)


In [8]:
def convert_one(apps: OfficeApps, src: Path, out_dir: Path):
    out_dir.mkdir(parents=True, exist_ok=True)
    ext = src.suffix.lower()

    if ext == ".docx":
        export_word_to_pdf(apps.get_word(), src, out_dir / f"{src.stem}.pdf")
        return True

    if ext == ".pptx":
        export_ppt_to_pdf(apps.get_ppt(), src, out_dir / f"{src.stem}.pdf")
        return True

    if ext in {".xlsx", ".xlsm", ".csv"}:
        export_excel_sheet_pdfs(apps.get_excel(), src, out_dir)
        return True

    return False

In [9]:
def walk_and_convert(in_root: Path, out_root: Path):
    apps = OfficeApps()
    converted, failed = 0, []

    try:
        for root, _, files in os.walk(in_root):
            for name in files:
                src = Path(root) / name
                if src.suffix.lower() not in SUPPORTED_EXTS:
                    continue

                rel_dir = Path(root).relative_to(in_root)
                dst_dir = out_root / rel_dir

                try:
                    ok = convert_one(apps, src, dst_dir)
                    if ok:
                        converted += 1
                        print(f"OK   -> {src}")
                    else:
                        print(f"SKIP -> {src}")
                except Exception as e:
                    failed.append((str(src), str(e)))
                    print(f"FAIL -> {src} : {e}", file=sys.stderr)
    finally:
        apps.close()

    print(f"\nConverted files: {converted}")
    if failed:
        print("Failures:")
        for f, err in failed:
            print(f" - {f}\n   {err}")



In [10]:
if __name__ == "__main__":
    in_dir = Path(r"C:\Users\Ashvini\Downloads\InputFiles").resolve()
    out_dir = Path(r"C:\Users\Ashvini\Downloads\OutputPDFs").resolve()
    out_dir.mkdir(parents=True, exist_ok=True)
    walk_and_convert(in_dir, out_dir)


Converted files: 0
Failures:
 - C:\Users\Ashvini\Downloads\InputFiles\Node_Exporter_Metrics.csv
   (-2147352567, 'Exception occurred.', (0, None, None, None, 0, -2146827284), None)


FAIL -> C:\Users\Ashvini\Downloads\InputFiles\Node_Exporter_Metrics.csv : (-2147352567, 'Exception occurred.', (0, None, None, None, 0, -2146827284), None)
