# Word Documents (docx)

You can automatically generate or read Word documents. You will need to install `python-docx` using conda or pip (run these in a shell):
* `conda install python-docx`
* `pip install python-docx`
    * to install behind firewall, you will need to specify proxy with `--proxy` option
    * or, if just download from here: https://www.lfd.uci.edu/~gohlke/pythonlibs/
        * `pip install python_docx-0.8.6-....whl`
        
Some notes:
* A document is divided into paragraphs and tables
* A paragraph is divided into "runs" of text

In [None]:
from docx import Document

document = Document()

document.add_heading('Document Title', 0)

p = document.add_paragraph('A plain paragraph having some ')
p.add_run('bold').bold = True
p.add_run(' and some ')
p.add_run('italic.').italic = True

document.add_heading('Heading, level 1', level=1)
document.add_paragraph('Intense quote', style='IntenseQuote')

document.add_paragraph(
    'first item in unordered list', style='ListBullet'
)
document.add_paragraph(
    'first item in ordered list', style='ListNumber'
)

# from docx.shared import Inches
# document.add_picture('monty-truth.png', width=Inches(1.25))

table = document.add_table(rows=1, cols=3)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Qty'
hdr_cells[1].text = 'Id'
hdr_cells[2].text = 'Desc'
for item in [(1, 'play-doh', 5), (2, 'dough', 2)]:
    row_cells = table.add_row().cells
    row_cells[0].text = str(item[2])
    row_cells[1].text = str(item[0])
    row_cells[2].text = item[1]

document.add_page_break()

document.save('demo.docx')

In [None]:
with open('demo.docx', 'rb') as fh:
    document = Document(fh)

In [None]:
for paragraph in document.paragraphs:
    if 'first' in paragraph.text:
        print('******First!****')
    for word in paragraph.text.split():
        print(word)

In [None]:
# replace text
for p in document.paragraphs:
    if 'first' in p.text:
        for i in range(len(p.runs)):
            if 'first' in p.runs[i].text:
                p.runs[i].text = p.runs[i].text.replace('first', 'second')

In [None]:
for paragraph in document.paragraphs:
    if 'first' in paragraph.text:
        print('******First!****')
    elif 'second' in paragraph.text:
        print('NOT FIRST!')

In [None]:
# detect bold or other features
p.runs[i].bold