In [41]:
import os
import re
import sys
from pathlib import Path

In [42]:
import win32com.client as win32  # type: ignore


In [43]:
SUPPORTED_EXTS = {".csv", ".xlsx", ".xlsm", ".pptx", ".docx"}
# Office constants
WD_EXPORT_PDF = 17
XL_TYPE_PDF = 0
PP_SAVE_AS_PDF = 32
XL_SHEET_VISIBLE = -1  # visible (vs 0 hidden, 2 very hidden)

In [44]:
def sanitize_filename_part(name: str, max_len: int = 80) -> str:
    """Make a string safe to use in file names."""
    name = re.sub(r'[<>:"/\\|?*\x00-\x1F]', "_", name)
    name = name.strip().strip(".")  # no trailing dots/spaces
    if len(name) > max_len:
        name = name[:max_len].rstrip()
    return name or "Sheet"

In [45]:
class OfficeApps:
    """Lazy-initialize Office applications and quit them cleanly at the end."""
    def __init__(self):
        self.word = None
        self.excel = None
        self.ppt = None

    def get_word(self):
        if self.word is None:
            self.word = win32.gencache.EnsureDispatch("Word.Application")
            self.word.Visible = False
        return self.word

    def get_excel(self):
        if self.excel is None:
            self.excel = win32.gencache.EnsureDispatch("Excel.Application")
            self.excel.Visible = False
            self.excel.DisplayAlerts = False
        return self.excel

    def get_ppt(self):
        if self.ppt is None:
            self.ppt = win32.gencache.EnsureDispatch("PowerPoint.Application")
        return self.ppt

    def close(self):
        for app in ("word", "excel", "ppt"):
            inst = getattr(self, app, None)
            if inst is not None:
                try:
                    inst.Quit()
                except Exception:
                    pass

In [46]:
def export_word_to_pdf(word_app, src: Path, dst_pdf: Path):
    doc = None
    try:
        doc = word_app.Documents.Open(str(src))
        doc.ExportAsFixedFormat(str(dst_pdf), WD_EXPORT_PDF)
    finally:
        if doc is not None:
            doc.Close(False)

In [47]:
def export_ppt_to_pdf(ppt_app, src: Path, dst_pdf: Path):
    pres = None
    try:
        pres = ppt_app.Presentations.Open(str(src), WithWindow=False)
        pres.SaveAs(str(dst_pdf), PP_SAVE_AS_PDF)
    finally:
        if pres is not None:
            pres.Close()

In [48]:
def export_excel_sheet_pdfs(excel_app, src: Path, out_dir: Path):
    """
    Export visible sheets from an Excel workbook:
    - If 1 visible sheet -> export single PDF "<stem>.pdf"
    - If >1 visible sheets -> export one PDF per sheet "<stem>_<sheet>.pdf"
    """
    wb = None
    try:
        # ReadOnly to avoid prompts; UpdateLinks=0 to skip link updates
        wb = excel_app.Workbooks.Open(str(src), UpdateLinks=0, ReadOnly=True)
        sheets = [ws for ws in wb.Worksheets if ws.Visible == XL_SHEET_VISIBLE]

        # No visible sheets -> export whole workbook fallback
        if len(sheets) == 0:
            target = out_dir / f"{src.stem}.pdf"
            wb.ExportAsFixedFormat(XL_TYPE_PDF, str(target))
            return

        if len(sheets) == 1:
            target = out_dir / f"{src.stem}.pdf"
            # Export the single visible sheet only
            sheets[0].ExportAsFixedFormat(XL_TYPE_PDF, str(target))
            return

        # Multiple visible sheets: export each separately
        print(f"{src.name}: {len(sheets)} visible sheets -> exporting each to its own PDF")
        for ws in sheets:
            sheet_name = sanitize_filename_part(ws.Name)
            target = out_dir / f"{src.stem}_{sheet_name}.pdf"
            ws.ExportAsFixedFormat(XL_TYPE_PDF, str(target))

    finally:
        if wb is not None:
            wb.Close(False)


In [49]:
def convert_one(apps: OfficeApps, src: Path, out_dir: Path):
    out_dir.mkdir(parents=True, exist_ok=True)
    ext = src.suffix.lower()

    if ext in {".docx"}:
        dst = out_dir / f"{src.stem}.pdf"
        export_word_to_pdf(apps.get_word(), src, dst)
        return True

    if ext in {".pptx"}:
        dst = out_dir / f"{src.stem}.pdf"
        export_ppt_to_pdf(apps.get_ppt(), src, dst)
        return True

    if ext in {".xlsx", ".xlsm", ".csv"}:
        # For CSV and Excel, we use Excel. CSV opens as a single worksheet.
        export_excel_sheet_pdfs(apps.get_excel(), src, out_dir)
        return True

    return False  # unsupported

In [50]:
def walk_and_convert(in_root: Path, out_root: Path):
    apps = OfficeApps()
    converted, failed = 0, []

    try:
        for root, _, files in os.walk(in_root):
            for name in files:
                src = Path(root) / name
                if src.suffix.lower() not in SUPPORTED_EXTS:
                    continue
                rel_dir = Path(root).relative_to(in_root)
                dst_dir = out_root / rel_dir

                try:
                    ok = convert_one(apps, src, dst_dir)
                    if ok:
                        converted += 1
                        print(f"OK   -> {src}")
                    else:
                        print(f"SKIP -> {src}")
                except Exception as e:
                    failed.append((str(src), str(e)))
                    print(f"FAIL -> {src} : {e}", file=sys.stderr)
    finally:
        apps.close()

    print(f"\nConverted files: {converted}")
    if failed:
        print("Failures:")
        for f, err in failed:
            print(f" - {f}\n   {err}")

if __name__ == "__main__":
    # Hard-coded input and output folders
    in_dir = Path(r"C:\Users\Ashvini\Downloads\InputFiles").resolve()
    out_dir = Path(r"C:\Users\Ashvini\Downloads\OutputPDFs").resolve()

    # Make sure output folder exists
    out_dir.mkdir(parents=True, exist_ok=True)

    # Run conversion
    walk_and_convert(in_dir, out_dir)



Converted files: 0
Failures:
 - C:\Users\Ashvini\Downloads\InputFiles\Node_Exporter_Metrics.csv
   (-2147352567, 'Exception occurred.', (0, None, None, None, 0, -2146827284), None)


FAIL -> C:\Users\Ashvini\Downloads\InputFiles\Node_Exporter_Metrics.csv : (-2147352567, 'Exception occurred.', (0, None, None, None, 0, -2146827284), None)
