# Python Latex to PreText Parser

## Import Libraries

In [1]:
import latex_to_pretext_parsing_tools as ltp
import re # regular expressions
from os import walk # directory listings
from tkinter import Tk     # from tkinter import Tk for Python 3.x
from tkinter.filedialog import askopenfilename

## Load latex document

### Run Script

Directions:

* Set latex chapter location
* Set pretext file location

In [2]:
Tk().withdraw()              # we don't want a full GUI, so keep the root window from appearing
#filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file

# User values (from directions)
tex_root = 'C:/Users/gcox0/Google Drive/1--MATLAB_textbook/MA110 MA310 SP21/'
main_tex_name = 'MA110_310_Text.tex'
xml_root = 'C:/Users/gcox0/Google Drive/1--MATLAB_textbook/4--master_copy/matlab-ebook/'
book_title = 'MATLAB BOOK'

# get filenames from chapter location
tex_chapter_root = tex_root + 'chapters/'
pretext_section_root = xml_root + 'sections/'
filenames = next(walk(tex_chapter_root), (None, None, []))[2]  # [] if no file

# Set book parts
parts = ['MATLAB Fundamentals',
         'MATLAB Fundamentals',
         'Flow Control',
         'part3',
         'part3',
         'part3',
         'part3',
         'Applications',
         'Applications',
         'Applications',
         'Applications',
         'Applications',
         'Applications',
         'Applications']

# get chapter titles and ids
chapter_titles = []
chapter_ids = []
main_tex_file = open(tex_root + main_tex_name,"r")
main_tex_lines = main_tex_file.readlines()
main_tex_file.close()
for line in main_tex_lines:
    #print(line)
    if re.search(r'\\chapter',line) != None:
        chapter_match = re.search(r'\\chapter\{(.*?)\}.*\\label\{chapter\:(.*?)\}', line)
        title = chapter_match.group(1)
        chapter_titles.append(re.sub('Matlab','MATLAB',title))
        chapter_ids.append(chapter_match.group(2))
        
# get chapter content
chapter_files = []
for file in filenames:
    if re.search('^\d\d_Matlab',file) != None:
        chapter_files.append(file)

# prepare xml main
xml_id = re.sub(r'\s','-',book_title.lower()) + '-main'
main_xml_file_path = xml_root + xml_id + '.xml'
main_xml_all_file_path = xml_root + xml_id + '_all.xml'
main_xml_file = open(main_xml_file_path,"w+")
main_xml_all_file = open(main_xml_all_file_path,"w+")
xml_main_lines = ltp.add_header(xml_id, book_title)

tab_depth = 1
first_part = True
part = ''
# scan through each chapter 
for chpt_num in range(len(chapter_files)): #
    
    # Check if we need to add a new part
    if parts[chpt_num] != part:
        if not(first_part):
            tab_depth -= 1
            xml_main_lines.append('\t'*tab_depth + '</part>\n')
            xml_main_lines.append('\n')
        else:
            first_part = False
        
        part = parts[chpt_num]
        xml_main_lines.append('\t'*tab_depth + '<part>\n')
        xml_main_lines.append('\t'*tab_depth + '<title>' + part + '</title>\n')
        xml_main_lines.append('\n')
        tab_depth += 1
    
    # Load and read each chapter
    chapter_name = chapter_files[chpt_num]
    chapter_path = tex_chapter_root + chapter_name

    xml_main_lines.append('\t'*tab_depth + '<chapter xml:id="' + chapter_ids[chpt_num] + '">\n')
    xml_main_lines.append('\t'*tab_depth + '<title>' + chapter_titles[chpt_num] + '</title>\n')
    xml_main_lines.append('\n')

    # create separate section files
    try:

        chapter_latex_file = open(chapter_path,"r")
        chapter_latex_lines = chapter_latex_file.readlines()
        chapter_latex_file.close()
        chpt_sections = ltp.split_sections(chapter_latex_lines, chpt_num+1, pretext_section_root)

        tab_depth += 1
        # parse each section and create a main file
        for section_path in chpt_sections:

            section_path_split = re.split('/',section_path)
            section_file = section_path_split[-1]

            xml_main_lines.append('\t'*tab_depth + '<xi:include href="./sections/' + section_file + '" />\n')

            #print(chapter_name,section_path)
            section_latex_file = open(section_path, "r")
            section_latex_lines = section_latex_file.readlines()
            section_latex_file.close()

            section_latex_lines = ltp.move_section_intro(section_latex_lines)
            section_latex_lines = ltp.parse_general_top_priority(section_latex_lines)
            section_latex_lines = ltp.parse_subsections(section_latex_lines)
            section_latex_lines = ltp.parse_examples(section_latex_lines)
            section_latex_lines = ltp.parse_verbs(section_latex_lines)
            section_latex_lines = ltp.parse_display_math(section_latex_lines)
            section_latex_lines = ltp.parse_general_low_priority(section_latex_lines)
            section_latex_lines = ltp.remove_double_blank_lines(section_latex_lines)
            section_latex_lines = ltp.parse_lists(section_latex_lines)
            section_latex_lines = ltp.parse_tables(section_latex_lines)
            section_latex_lines = ltp.parse_paragraphs(section_latex_lines)
            section_latex_lines = ltp.fix_p_tags(section_latex_lines)
            section_latex_lines = ltp.fix_indentations(section_latex_lines)

            section_pretext_file = open(section_path, "w+")
            section_pretext_file.writelines(section_latex_lines)
            section_pretext_file.close()

        tab_depth -= 1

    except:
        print('could not parse chapter: ' + chapter_name)

    xml_main_lines.append('\n')
    xml_main_lines.append('\t'*tab_depth + '</chapter>\n')
    xml_main_lines.append('\n')

tab_depth -= 1
xml_main_lines.append('\t'*tab_depth + '</part>\n')
xml_main_lines = ltp.add_footer(xml_main_lines)
main_xml_file.writelines(xml_main_lines)
main_xml_file.close()
main_xml_all_file.writelines(xml_main_lines)
main_xml_all_file.close() 

Create Specific Sections

In [2]:
chapters = [3]
sections = [1,2,3,4,5,6]
all_chpts_and_sects = False

xml_main_o = '../matlab-book-main_all.xml'
xml_main_n = '../matlab-book-main.xml'

# read xml main
xml_main_fid = open(xml_main_o,"r")
xml_main_lines = xml_main_fid.readlines()
xml_main_fid.close()

new_xml_main_lines = []

if all_chpts_and_sects:
    
    new_xml_main_lines = xml_main_lines
    
else:
    
    chpt_num = 0
    sect_num = 0

    in_comment = False

    for line in xml_main_lines:

        # Cases:
        # if in chapter and in comment => close comment
        # if in chapter and not in comment => do nothing
        # if not in chapter and in comment => do nothing
        # if not in chapter and not in comment =>   

        # found a chapter header
        if re.search(r'\<chapter xml\:id',line) != None:
            chpt_num += 1
            if chpt_num not in chapters:
                new_xml_main_lines.append('<!--\n')
                in_comment = True

        # found a section
        if re.search(r'\<xi\:include href="./sections/sec',line) != None:
            sect_num += 1
            # Cases:
            # if in section and in comment => close comment
            # if in section and not in comment => do nothing
            # if not in section and in comment => do nothing
            # if not in section and not in comment => open comment
            if chpt_num in chapters:
                if sect_num in sections and in_comment:
                    new_xml_main_lines.append('-->\n')
                    in_comment = False
                elif sect_num not in sections and not in_comment:
                    new_xml_main_lines.append('<!--\n')
                    in_comment = True

        # found a chapter footer
        if re.search(r'\</chapter\>',line) != None:
            sect_num = 0
            if in_comment:
                if chpt_num in chapters:
                    new_xml_main_lines.append('-->\n') 
                    new_xml_main_lines.append(line)
                    in_comment = False
                else:
                    new_xml_main_lines.append(line)
                    new_xml_main_lines.append('-->\n')
                    in_comment = False
            else:
                new_xml_main_lines.append(line)
        else:
            new_xml_main_lines.append(line)

# save xml main
xml_main_fid = open(xml_main_n,"w+")
xml_main_fid.writelines(new_xml_main_lines)
xml_main_fid.close()   

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/gcox0/Google Drive/1--MATLAB_textbook/4--master_copy/matlab-ebook/matlab-book-main_all.xml'

Converting Sage Cell Button

In [None]:
import re # regular expressions
from os import walk # directory listings

html_root = 'C:/Users/gcox0/Google Drive/1--MATLAB_textbook/4--master_copy/matlab-ebook/'

# get files from html_root
filenames = next(walk(html_root), (None, None, []))[2]  # [] if no file
        
# get chapter content
html_files = []
for file in filenames:
    if re.search('.html$',file) != None:
        html_files.append(file)

for html_file in html_files:
    new_html_lines = []
    #print(html_file)
    html_fid = open(html_root + html_file,"r")
    html_lines = html_fid.readlines()
    html_fid.close()
    
    write_file = False
    for line in html_lines:
        #print(line)
        if re.search(r'Evaluate \(Octave\)',line) != None:
            write_file = True
            print('HERE')
            new_html_lines.append(re.sub(r'Evaluate \(Octave\)','Run MATLAB',line))
        else:
            new_html_lines.append(line)
            
    if write_file:
        print('Updating: ' + html_file)
        html_fid = open(html_root + html_file,"w+")
        html_fid.writelines(new_html_lines)
        html_fid.close()
        