# Functions that I use to automate Microsoft Word (.docx) document creation 

This jupyter notebook contains 4 functions:
1. ```list_docx_styles(template_path)```: To list predefined 'styles' in a .docx file: this is important because you can respect the layout of a template .docx file
2. ```add2report(input, type=None)```: to append python variables (text, list, set, dataframe) into a .docx file
3. ```replace_placeholder_in_docx(file_path, placeholder, replacement, output_path=None)```: to replace placeholders/anchors predefined in the template .docx file (format {{PLACEHOLDER_EXAMPLE}})
4. ```update_docx_toc(file_path)```: to update the Table of Content (BUT it only works if you are running on Windows 🤬, sorry!)

## 1. Function to **list** predefined 'styles' in a .docx file

In [122]:
from docx import Document
import os

def list_docx_styles(template_path):
    if not os.path.exists(template_path):
        raise FileNotFoundError(f"File '{template_path}' not found.")
    
    try:
        # If it's a .dotx, copy to .docx to load it safely
        if template_path.lower().endswith('.dotx'):
            import shutil
            temp_docx = '_temp_for_style_listing.docx'
            shutil.copy(template_path, temp_docx)
            doc = Document(temp_docx)
            os.remove(temp_docx)
        else:
            doc = Document(template_path)

        styles = doc.styles
        print(f"Styles in '{template_path}':\n")

        for style in styles:
            print(f"- {style.name} ({style.type})")

    except Exception as e:
        print(f"Error reading styles: {e}")

Testing:

In [125]:
# template_file =  'Template_Secura.docx'
# list_docx_styles(template_file)

In [126]:
# doc = Document(template_file)
# paragraph_styles = [s.name for s in doc.styles if s.type == 1]  # 1 = PARAGRAPH
# print(paragraph_styles)

## 2. Function to **append** python variable content in a .docx file 

In [118]:
def add2report(input, type=None):
    try:
        from docx.enum.text import WD_ALIGN_PARAGRAPH
        from docx.oxml.ns import qn
        from docx.oxml import OxmlElement

        # Load or initialize document
        if os.path.exists(output_file):
            doc = Document(output_file)
        else:
            if not os.path.exists(template_file):
                raise FileNotFoundError(f"Template file '{template_file}' not found.")
            if template_file.lower().endswith('.dotx'):
                temp_docx = '_temp_from_template.docx'
                shutil.copy(template_file, temp_docx)
                doc = Document(temp_docx)
                os.remove(temp_docx)
            else:
                doc = Document(template_file)

        # IMAGE file
        if isinstance(input, str) and os.path.exists(input) and input.lower().endswith(('.png', '.jpg', '.jpeg')):
            paragraph = doc.add_paragraph()
            run = paragraph.add_run()
            run.add_picture(input, width=Inches(5))
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

        # TEXT input with header/paragraph/bullet/numbered
        elif isinstance(input, str):
            if type in ['header1', 'header2', 'paragraph', None]:
                style_name = style_map.get(type, style_map["paragraph"])
                para = doc.add_paragraph(input)
                para.style = style_name

            elif type in ['bullet', 'numbered']:
                style_name = style_map.get(type, "List Paragraph")
                para = doc.add_paragraph(input, style=style_name)
                para.paragraph_format.left_indent = None

        # LIST or SET input
        elif isinstance(input, (list, set)):
            iterable = list(input)
            if type in ["list", "bullet", "numbered"]:
                style_name = style_map.get(type, "List Paragraph")
                for item in iterable:
                    para = doc.add_paragraph(str(item), style=style_name)
                    para.paragraph_format.left_indent = None
            else:
                df = pd.DataFrame(iterable)
                return add2report(df, type=type)

        # DATAFRAME input
        elif isinstance(input, pd.DataFrame):
            df = input.copy()
            if df.index[0] == 0:
                df.index = df.index + 1

            if len(df) > 100:
                root = Tk()
                root.withdraw()
                response = messagebox.askyesno("Large Table Warning", f"There are {len(df)} items to be added to the document. Are you sure you want to proceed?")
                root.destroy()
                if not response:
                    return "❌"

            table = doc.add_table(rows=1, cols=len(df.columns) + 1)
            table.style = style_map.get("table", "Table Grid")

            # Center the table
            tbl = table._element

            tblPr = tbl.find(qn('w:tblPr'))
            if tblPr is None:
                tblPr = OxmlElement('w:tblPr')
                tbl.insert(0, tblPr)
            
            jc = OxmlElement('w:jc')
            jc.set(qn('w:val'), 'center')
            tblPr.append(jc)

            hdr_cells = table.rows[0].cells
            hdr_cells[0].text = ''  # empty header for "Index"
            for i, col in enumerate(df.columns):
                hdr_cells[i + 1].text = str(col)

            for idx, row in df.iterrows():
                row_cells = table.add_row().cells
                row_cells[0].text = str(idx)
                for i, cell in enumerate(row):
                    row_cells[i + 1].text = str(cell)

        else:
            raise TypeError("Unsupported input type. Must be str, list, set, DataFrame, or image.")

        doc.save(output_file)
        return "✅"

    except Exception as e:
        print(f"Error: {e}")
        return "❌"


Testing: 

In [127]:
# template_file =  'Template_Secura.docx'
# output_file = 'Report2.docx'

# # Replace with actual style name from your template
# style_map = {"header1": "Heading 1",
#              "header2": "Heading 2",
#              "paragraph": "Normal",
#              "table": "Secura style table 2023",
#              "list": "List Paragraph",
#             "numbered": "Numbered list"}

# #--------------------------------
# text_variable = "Text Example"
# add2report(text_variable, type="header1")
# add2report(text_variable, type="header2")
# add2report(text_variable, type="bullet")
# add2report(text_variable, type="numbered")
# add2report(text_variable)


# list_variable = ["José", "Jair", "Santanna"]
# add2report(list_variable)
# add2report(list_variable, type="list")
# add2report(list_variable, type="numbered")

# dataframe_variable = pd.DataFrame({"Name": ["José", "Jair", "Santanna"], "Score": [100, 99, 98], "Passed": [True, True, False]})
# add2report(dataframe_variable)
# #--------------------------------
# sns.barplot(data=pd.DataFrame({'value': [4, 5, 6], 'category': ['A', 'B', 'C']}), x='category', y='value')
# plt.tight_layout()             
# plt.savefig("bar.png")         
# plt.close()                   
# add2report("bar.png")

## 3. Function to **replace** Placeholders/Anchors (format {{PLACEHOLDER_EXAMPLE}}) for text

In [59]:
import re
from docx import Document
import os

def replace_placeholder_in_docx(file_path, placeholder, replacement, output_path=None):
    """
    Replaces a single {{PLACEHOLDER}} with replacement text across all paragraphs,
    headers, and footers in the Word document.
    Saves changes in-place unless output_path is specified.
    """
    pattern = re.compile(r"\{\{([^}]+)\}\}")  # Matches {{PLACEHOLDER}}
    target_key = placeholder.strip("{}")      # Normalize input: {{USERNAME}} -> USERNAME

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File '{file_path}' not found.")

    doc = Document(file_path)
    made_replacement = False

    def replace_in_paragraph(paragraph):
        nonlocal made_replacement
        full_text = paragraph.text
        if f"{{{{{target_key}}}}}" not in full_text:
            return

        runs = paragraph.runs
        combined_text = ''
        run_map = []  # (run_index, char_index)

        for idx, run in enumerate(runs):
            for i in range(len(run.text)):
                run_map.append((idx, i))
            combined_text += run.text

        for match in pattern.finditer(combined_text):
            key = match.group(1)
            if key != target_key:
                continue

            start, end = match.start(), match.end()
            if start >= len(run_map) or end > len(run_map):
                continue

            run_indices = {run_map[i][0] for i in range(start, end)}
            involved = sorted(run_indices)

            prefix = combined_text[:start]
            suffix = combined_text[end:]
            new_text = prefix + str(replacement) + suffix

            for idx in involved:
                runs[idx].text = ''
            runs[involved[0]].text = new_text
            made_replacement = True
            break

    # Replace in body paragraphs
    for para in doc.paragraphs:
        replace_in_paragraph(para)

    # Replace in headers and footers
    for section in doc.sections:
        header = section.header
        footer = section.footer

        for para in header.paragraphs:
            replace_in_paragraph(para)
        for para in footer.paragraphs:
            replace_in_paragraph(para)

    # Save updated doc
    if made_replacement:
        save_path = output_path if output_path else file_path
        doc.save(save_path)
        return "✅"
    else:
        return "❌"


Testing:

In [66]:
# placeholder = "{{REPORT_TITLE}}"
# replacement = "Leaked Data Analysis"
# replace_placeholder_in_docx(output_file, placeholder, replacement)

# placeholder = "{{AUTHOR_NAME}}"
# replacement = "Jair Santanna"
# replace_placeholder_in_docx(output_file, placeholder, replacement)

# placeholder = "{{TLP_LABEL}}"
# replacement = "TLP:Clear"
# replace_placeholder_in_docx(output_file, placeholder, replacement)

# # {{MANAGEMENT_SUMMARY}}

## 4. Function to update ** Table of Content** (ONLY FOR WINDOWS 🤬) 

In [72]:
# Although I've done this in a Windows PC, I usually run it in a Mac or Linux!

In [73]:
# !pip install pywin32
# import win32com.client as win32
import os

def update_docx_toc(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File '{file_path}' not found.")

    word = win32.gencache.EnsureDispatch('Word.Application')
    word.Visible = False

    try:
        doc = word.Documents.Open(file_path)
        doc.UpdateFields()  # Updates all fields (TOC included)
        doc.Save()
        doc.Close()
        return "✅"
    except Exception as e:
        return f"❌ Failed: {e}"
    finally:
        word.Quit()

Testing: 

In [74]:
# update_docx_toc(file_path)