# Docx functions

In [1]:
# !pip install python-docx
from docx import Document
from docx.shared import Inches, Cm
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.text import WD_COLOR_INDEX
from docx.text.paragraph import Paragraph
from docx.oxml.xmlchemy import OxmlElement
import shutil

In [6]:
def create_file_based_template (template_path, document_path):
    shutil.copy(template_path,document_path)

def append_paragraph_in_document (document_path, text):     
    document = Document(document_path)
    document.add_paragraph(text)
    document.save(document_path) 
    
def append_header_in_document (document_path, level, header_text):     
    document = Document(document_path)
    document.add_heading(header_text, level=level)
    document.save(document_path) 
    
def append_fig_in_document (document_path, figure_path, figure_label):
    document = Document(document_path)
    document.add_picture(figure_path) #, width=Inches(5.9))
    document.paragraphs[-1].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    fig_label = document.add_paragraph()
    fig_label_header=  fig_label.add_run('Figure. ')
    fig_label_header.bold = True
    fig_label.add_run(figure_label)
    document.paragraphs[-1].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    document.save(document_path) 
    
def append_dataframe_in_document (document_path, dataframe):
    document = Document(document_path)
    #fixing the index
    dataframe = dataframe.reset_index()
    dataframe.index +=1
    dataframe= dataframe.rename(columns={"index":"#"})
    #creating the table
    t = document.add_table(dataframe.shape[0]+1, dataframe.shape[1])
    # add the header rows.
    for j in range(dataframe.shape[-1]):
        t.cell(0,j).text = dataframe.columns[j]
    # add the rest of the data frame
    for i in range(dataframe.shape[0]):
        for j in range(dataframe.shape[-1]):
            t.cell(i+1,j).text = str(dataframe.values[i,j])
    #adding style
#     t.style = 'mvt'
#     for cell in t.columns[0].cells:
#         cell.width = Inches(0.2)
    document.save(document_path)

def search_n_replace_4word (document_path, search_text, replace_text):
    document = Document(document_path)
    for paragraph in document.paragraphs:
        if search_text in paragraph.text:
            inline = paragraph.runs
            # Loop added to work with runs (strings with same style)
            for i in range(len(inline)):
                if search_text in inline[i].text:
                    text = inline[i].text.replace(search_text, str(replace_text))
                    inline[i].text = text
    document.save(document_path)
    
def search_n_replace_4paragraph (document_path, search_text, replace_text, keep_anchor=False):
    document = Document(document_path)
    for paragraph in document.paragraphs:
        if search_text in paragraph.text:
            paragraph.text = replace_text
            if keep_anchor == True:
                paragraph._p.addnext(document.add_paragraph(search_text)._p)            
    document.save(document_path)
    
def search_n_replace_4figure (document_path, search_text, figure_path, keep_anchor=False):
    document = Document(document_path)
    for paragraph in document.paragraphs:
        if search_text in paragraph.text:
            print(' - FOUND & REPLACED:',search_text,'\n')
            paragraph.text = ''
            paragraph.add_run().add_picture(figure_path, width=Inches(5.11))
            document.paragraphs[-1].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

            if keep_anchor == True:
                paragraph._p.addnext(document.add_paragraph(search_text)._p)            
    document.save(document_path)
    
def search_n_replace_4header (document_path, search_text, replace_text, level, keep_anchor=False):
    document = Document(document_path)
    for paragraph in document.paragraphs:
        if search_text in paragraph.text:
            paragraph.text = ''
            
            h = document.add_heading(replace_text, level=level)._p
            paragraph._p.addnext(h)
            
            if keep_anchor == True:
                h.addnext(document.add_paragraph(search_text)._p)            
    document.save(document_path)
    
    
def search_n_replace_4dataframe (document_path, search_text, dataframe, keep_anchor=False):
    document = Document(document_path)

    dataframe = dataframe.reset_index()
    dataframe= dataframe.rename(columns={"index":"#"})
    dataframe['#'] +=1
      
    for paragraph in document.paragraphs:
        if search_text in paragraph.text:
            print(' - FOUND & REPLACED:',search_text,'\n')
            paragraph.text = ""
            
            #creating the table
            t = document.add_table(dataframe.shape[0]+1, dataframe.shape[1])
            # add the header rows.
            for j in range(dataframe.shape[-1]):
                t.cell(0,j).text = dataframe.columns[j]
            # add the rest of the data frame
            for i in range(dataframe.shape[0]):
                for j in range(dataframe.shape[-1]):
                    t.cell(i+1,j).text = str(dataframe.values[i,j])
            #adding style
#             t.style = 'Table Grid'
#             t.style = 'mvt'
            for cell in t.columns[0].cells:
                cell.width = Cm(0.79)
            
            ## Substitution
            tbl, p = t._tbl, paragraph._p
            p.addnext(tbl)
            
            if keep_anchor:
                tbl.addnext(document.add_paragraph(search_text)._p)
            break
        
        
    document.save(document_path)

def search_n_replace_4bulletlist (document_path, search_text, bulletlist):
    document = Document(document_path)
    for paragraph in document.paragraphs:
        if search_text in paragraph.text:
            paragraph_raw = paragraph
            for item in bulletlist:               
                new_p = OxmlElement("w:p")
                paragraph._p.addnext(new_p)
                new_para = Paragraph(new_p, paragraph._parent)
                new_para.style = document.styles['List Paragraph']
                new_para.add_run(item)    
                paragraph = new_para

            p = paragraph_raw._element
            p.getparent().remove(p)
            p._p = p._element = None
            
    document.save(document_path)    
    
def pagebreak (document_path):
    document = Document(document_path)
    document.add_page_break()
    document.save(document_path)
    
def remove_style_elements (document_path):
    document = Document(document_path)
    
    for paragraph in document.paragraphs:        
        if 'STYLE_BULLETLIST' in paragraph.text:
            paragraph._element.getparent().remove(paragraph._element)

    for table in document.tables:
        if 'STYLE_TABLE'in table.cell(0,0).paragraphs[0].text:
            table._element.getparent().remove(table._element)
        
    document.save(document_path) 

# Using the functions

In [18]:
document_path='docx_examples.docx'

#### Creating an empty document

In [19]:
document = Document()
document.save(document_path)

#### Adding a header to the empty document

In [20]:
document.add_heading('Python docx examples', 0)
document.save(document_path)

In [21]:
text = 'Hame'
append_paragraph_in_document (document_path, text)

In [22]:
level = 2
header_text = 'Hame'
append_header_in_document (document_path, level, header_text)