In [None]:
!pip install python-docx pylatex


In [9]:
from docx import Document
from pylatex import Document as LatexDocument, Section, Subsection, Command, Figure, Table, Tabular, NoEscape
import os

def read_word_file(file_path):
    doc = Document(file_path)
    return doc

def convert_paragraphs(doc, latex_doc):
    for para in doc.paragraphs:
        if para.text:
            latex_doc.append(para.text)
            latex_doc.append("\n")

def convert_figures(doc, latex_doc):
    for rel in doc.inline_shapes:
        if rel.type == 3:  # This means it's an image
            image_path = rel._inline.graphic.graphicData.pic.blipFill.blip.embed
            image = doc.part.related_parts[image_path]
            image_filename = os.path.join('images', image.filename)
            with open(image_filename, 'wb') as img_file:
                img_file.write(image.blob)
            with latex_doc.create(Figure(position='h!')) as pic:
                pic.add_image(image_filename, width=NoEscape(r'0.8\textwidth'))
                pic.add_caption("Caption for image")

def convert_tables(doc, latex_doc):
    for table in doc.tables:
        max_cols = max(len(row.cells) for row in table.rows)
        column_format = "|".join(["c"] * max_cols)  # Adjust column format based on max_cols
        with latex_doc.create(Table(position='h!')) as tab:
            with tab.create(Tabular(f'|{column_format}|')) as data_table:
                for row in table.rows:
                    data_table.add_hline()
                    row_data = [cell.text for cell in row.cells]
                    row_data.extend([""] * (max_cols - len(row_data)))  # Pad with empty strings if row_data is short
                    data_table.add_row(row_data)
                data_table.add_hline()

def create_latex_document(doc):
    latex_doc = LatexDocument()
    with latex_doc.create(Section('Document Content')):
        convert_paragraphs(doc, latex_doc)
        convert_figures(doc, latex_doc)
        convert_tables(doc, latex_doc)
    return latex_doc

def main():
    file_path = 'Busca de Arquivos Interessantes.docx'
    doc = read_word_file(file_path)
    latex_doc = create_latex_document(doc)
    latex_doc.generate_pdf('output', clean_tex=False)

if __name__ == "__main__":
    main()


CompilerError: No LaTex compiler was found
Either specify a LaTex compiler or make sure you have latexmk or pdfLaTex installed.