In [1]:
import re
import sys
import json
import numpy as np
from operator import itemgetter
from itertools import chain
#############################################
# Settings
#############################################

In [2]:
nb_infile = "book/content/chapter3.ipynb"
nb_outfile = "book/content/chapter3_new.ipynb"
bib_filename = None

In [3]:
# Load Jupyter Notebook as Dictionary
try:
    with open(nb_infile,'r') as fp:
        nb = json.load(fp)
    fp.close()
    print("Input Jupyter Notebook read successfully")
except:
    print("Couldn't find Jupyter Notebook. Check your input path")

Input Jupyter Notebook read successfully


In [4]:
def split_cell_content(cell_content, latex_markers_positions):
    first_idx = latex_markers_positions[0]
    last_idx = latex_markers_positions[-1]
    latex_markers_positions = [0] + latex_markers_positions + [-1]
    
    eqs_begin_indices = latex_markers_positions[1:-1:2]
    eqs_end_indices = list(np.array( latex_markers_positions[2::2] ) + 1)
    eqs_idx_pairs = list(zip(eqs_begin_indices, eqs_end_indices))
    
    text_begin_indices = list(np.array( latex_markers_positions[2::2] ) + 1)
    text_end_indices = list(np.array( latex_markers_positions[3:-1:2] ) + 0)
    text_idx_pairs = [(0,first_idx)] + list(zip(text_begin_indices, text_end_indices)) + [(last_idx+1, -1)]
    
    all_idx_pairs = [(x, 'txt') for x in text_idx_pairs] + [(x, 'eq') for x in eqs_idx_pairs]
    all_idx_pairs.sort(key = lambda x: x[0][0])

    cell_content_split = [cell_content[idx_pair[0][0]: idx_pair[0][1]] for idx_pair in all_idx_pairs]
    cell_content_type = [idx_pair[1] for idx_pair in all_idx_pairs]
    
    return cell_content_split, cell_content_type

In [5]:
def insert_math_env(cell_content_split, cell_content_type):
    cell_content_new = []
    N = len(cell_content_split)
    for i in range(N):
        content_chunk = cell_content_split[i]
        content_type = cell_content_type[i]
        if content_type == "eq":
            # Define label name as empty character by default
            label_name = ""
            for line in content_chunk:
                # Find all LaTeX equation labels
                item_matched = re.findall(latex_eqlabel_pattern, line)
                if len(item_matched) > 0:
                    # Extract label name
                    try:
                        # if label name accompanied by LaTeX label marker
                        label_type, label_name = item_matched[0].strip().split(":")
                    except:
                        # or without
                        label_name = item_matched[0].strip()
                        print(label_name)
            # Wrap equation block up, MyST syntax
            upper = ["```{math}\n", "---\n", "label: "+label_name+"\n", "---\n"]
            body  =  content_chunk
            lower = ["```\n"]
            content_chunk =  list(chain.from_iterable([upper, body, lower])) 
            cell_content_new.append(content_chunk)
        else:
            cell_content_new.append(content_chunk)

    return list(chain.from_iterable(cell_content_new))

In [6]:
markdown_env_equation = ["```{math}", "```"] # MyST Markdown Equation environment markers
markdown_eqlabel = ["---", "label: ", "---"] # MyST Markdown equation label format

latex_eqlabel_pattern = re.compile(r'\\label\{(.+?)\}') # LaTeX equation label pattern
latex_env_equation = [r"\\begin{equation}", r"\\end{equation}"] #LaTeX equation environment markers
latex_env_eqnarray = [r"\\begin{eqnarray}", r"\\end{eqnarray}"] #LaTeX eqnarray environment markers

In [7]:
PATTERNS = latex_env_equation + latex_env_eqnarray

for i in range(len(nb['cells'])):
    cell = nb['cells'][i]
    latex_markers_positions = []
    cell_content = cell['source']
    
    for j in range(len(cell['source'])):
        item = cell['source'][j]

        for string in PATTERNS:
            item_matched = re.findall(string, item)
            if len(item_matched) > 0:
                latex_markers_positions.append(j)
    if len(latex_markers_positions) > 0:
        cell_content_split, cell_content_type = split_cell_content(cell_content, latex_markers_positions)
        cell_content_modified = insert_math_env(cell_content_split, cell_content_type)
        nb['cells'][i]['source'] = cell_content_modified

Xt
Ito
deterministic_system
noisy_saddle
noisy_saddle_solutions
noisy_saddle_RDS
invariance
invariance_x
invariance_y
stationary_orbit
general_noisy_saddle_solutions
x_increments
y_increments
higher_order_x
higher_order_y


In [8]:
with open(nb_outfile,'w') as fp:
    json.dump(nb, fp)
fp.close()

# Citation Syntax Conversion

In [None]:
%%writefile scripts/citations_latex2myst.py
def replace_citation_syntax(citations_matched_raw, item):
    """Replace native LaTeX syntax for inline citations in a cell item (paragraph) with MyST Markdown"""
    item_modified = item
    for match in citations_matched_raw:
        bib_tags_raw = re.split(",", match)
        bib_tags_clean = [x.strip() for x in bib_tags_raw]
        
        # Replace LaTeX syntax with new one in cell items
        new_syntax_items = ["{cite}`"]+[",".join(bib_tags_clean)]+["`"]
        separator = ''
        chunk_new = separator.join(new_syntax_items)
        chunk_original = '\cite{'+match+'}'
        item_modified = item_modified.replace(chunk_original, chunk_new)
    return item_modified

def replace_bibliography_syntax(bibliography_matched_raw, item):
    # Only one single bibliography line exists
    bib_filename = bibliography_matched_raw[0].strip()
    separator = ''
    new_syntax_items = ["```{bibliography} "]+[bib_filename+"\n"]+["```\n"]
    chunk_new = separator.join(new_syntax_items)
    chunk_original = r'\bibliography{'+bibliography_matched_raw[0]+'}'
    item_modified = item.replace(chunk_original, chunk_new)
    return item_modified

def latex_to_new_syntax_bib(nb):
    for i in range(len(nb['cells'])):
        cell = nb['cells'][i]
        for j in range(len(cell['source'])):
            item = cell['source'][j]

            citations_matched_raw = re.findall(citations_pattern,item)
            bibliography_matched_raw = re.findall(bibliography_pattern,item)

            if citations_matched_raw:
                item_modified = replace_citation_syntax(citations_matched_raw, item)
                nb['cells'][i]['source'][j] = item_modified

            elif bibliography_matched_raw:
                item_modified = replace_bibliography_syntax(bibliography_matched_raw, item)
                nb['cells'][i]['source'][j] = item_modified
    return nb

if __name__ == "__main__":
    import re
    import sys
    import json
    
    # Script inputs
    nb_infile = sys.argv[1]  # Input NoteBook (.ipynb)
    nb_outfile = sys.argv[2] # Output Notebook (.ipynb)
    bib_filename = sys.argv[3] # BibTeX file name (.bib)
    
    # Matching patterns of native LaTeX syntax for replacement
    citations_pattern = re.compile(r'\\cite\{(.+?)\}')
    bibliography_pattern = re.compile(r'\\bibliography\{(.+?)\}')
    
    # Load Jupyter Notebook via JSON
    try:
        with open(nb_infile,'r') as fp:
            nb = json.load(fp)
        fp.close()
    except:
        error_mssg = "Couldn't find Jupyter Notebook. Check your input path."
        print(error_mssg)
        sys.exit()
    
    # Modify syntax in all cells of input NoteBook
    nb_modified = latex_to_new_syntax_bib(nb)
    
    # Save modified Notebook
    with open(nb_outfile,'w') as fp:
        json.dump(nb_modified, fp)
    fp.close()
    output_mssg = "New Notebook successfully generated"
    print(output_mssg)