# An attempt at writing a script that can convert every specified file to LeanPub Flavoured Markdown for publication on LeanPub

In [None]:
from lib import read_mkdocs

In [None]:
from pyprojroot import here

In [None]:
mkdocs_config = read_mkdocs()
nav = mkdocs_config["nav"]
docroot = here() / "docs"


In [None]:
from lib import parse_navigation

# The goal here is to flatten the tree structure into a list of 2-tuples,
# where the title is the first element and the filename is the second element.
title_files = parse_navigation(nav, [])
title_files.insert(0, ('Preface', 'preface/preface.md'))
title_files

In [None]:
from lib import exclude

exclusion = [
    "Welcome", 
    "Get Setup",
    "Prerequisites",
    "Further Learning",
    "Style Guide",
]

title_files = exclude(title_files, titles=exclusion)


In [None]:
title_files

We now need to convert each of the files into Markua.

In [None]:
strings = ["df.head()", "another_thing\ndf2.head()"]


def replace_dataframe_head_with_markdown(s: str):
    new_string = ""
    for line in s.split("\n"):
        if line.endswith(".head()"):
            line = f"print({line}.to_markdown())"
        new_string += line + "\n"
    return new_string

# replace_dataframe_head_with_markdown(strings[0])

In [None]:
def replace_markdown_table_tabs(body: str):
    return body.replace("    |", "|")

In [None]:
from nbconvert.exporters import MarkdownExporter
from nbformat.notebooknode import NotebookNode
from nbconvert.preprocessors import ExecutePreprocessor
from lib import strip_execution_count

def nb2markdown(nb: NotebookNode, kernel: str):
    """
    Compile final notebook into a single PDF while executing it.

    :param nb: The compiled notebook object with all notebook cells.
    :param kernel: String name of the kernel to output.
    """
    # Convert all `.head()` to `.head().to_markdown()`
    # before execution
    for i, cell in enumerate(nb["cells"]):
        src = nb["cells"][i]["source"]
        src = (
            src
            .replace(".describe()", ".describe().to_markdown()")
            .replace("correlation_centrality(graphs[0])", "correlation_centrality(graphs[0]).to_markdown()")
            .replace("HTML(anim(G2, msg, n_frames=4).to_html5_video())", "# HTML(anim(G2, msg, n_frames=4).to_html5_video())")
        )
        src = replace_dataframe_head_with_markdown(src)
        
        nb["cells"][i]["source"] = src

    ep = ExecutePreprocessor(timeout=600, kernel_name=kernel)
    ep.preprocess(nb)

    strip_execution_count(nb)
    pdf_exporter = MarkdownExporter()
    body, resources = pdf_exporter.from_notebook_node(nb)
    return body, resources


In [None]:
from lib import read_notebook

In [None]:
sample_chapters = ["Preface", "Learning Goals", "Introduction to Graphs", "The NetworkX API"]

In [None]:
# Now, convert everything into plain text markdown.

In [None]:
from pathlib import Path
from pyprojroot import here

build_dir = here() / "manuscript"
build_dir.mkdir(parents=True, exist_ok=True)

images_dir = build_dir / "images"
images_dir.mkdir(parents=True, exist_ok=True)

In [None]:
def nth_repl_all(string: str, substring: str, replacement: str, nth: int) -> str:
    """Replace nth string with substring."""
    find = string.find(substring)
    # loop util we find no match
    i = 1
    while find != -1:
        # if i  is equal to nth we found nth matches so replace
        if i == nth:
            string = string[:find] + replacement + string[find + len(substring):]
            i = 0
        # find + len(sub) + 1 means we start after the last match
        find = string.find(substring, find + len(substring) + 1)
        i += 1
    return string



In [None]:
def mdlatex2lfmlatex(text):
    text = nth_repl_all(text, substring="$$", replacement="{@@}", nth=1)
    text = nth_repl_all(text, substring="$", replacement="{@@}", nth=1)
    text = nth_repl_all(text, substring="{@@}", replacement="{$$}", nth=1)
    text = nth_repl_all(text, substring="{$$}", replacement="{/$$}", nth=2)
    return text

In [None]:
import logging


logger = logging.getLogger()
logger.setLevel(logging.INFO)

book_txt = ""

for chapter, fpath_str in title_files:
    logging.info(f"Processing chapter {chapter}")
    fpath = Path(fpath_str)
    source_path = docroot / fpath
    # Handle notebooks
    if source_path.suffix == ".ipynb":
        text, resources = nb2markdown(read_notebook(source_path), kernel="nams")
    # Handle markdown files
    else:
        with open(source_path, "r+") as f:
            text = f.read()
        resources = dict()
        resources["outputs"] = dict()
        
    text = f"# {chapter}\n\n" + text

    if chapter in sample_chapters:
        insert = "{sample: true}\n\n"
        text = insert + text

    # More processing: Replace all output_* with <relative_dir>_md_<autogen_numbers>
    img_prefix = str(fpath.with_suffix(".md")).replace("/", "_").replace(".", "_") + "_"
    text = text.replace("output_", "images/" + img_prefix)

    # More processing: Leanpub Flavoured Markdown uses {$$} to delineate LaTeX.
    # text = text.replace("$$", "{$$}")
    text = mdlatex2lfmlatex(text)
    
    # More preprocessing: Clean up tabs for all of the markdown tables
    text = replace_markdown_table_tabs(text)

    markdown_dir = (build_dir / fpath).with_suffix(".md")
    markdown_dir.mkdir(parents=True, exist_ok=True)
    
    # Write the text out
    with open(markdown_dir / "index.md", "w+") as f:
        f.write(text)
        
    # Write the resources out
    for k, v in resources["outputs"].items():
        k = k.replace("output_", img_prefix)
        logging.debug(f"image filename = {k}")
        with open(images_dir / k, "wb") as f:
            f.write(v)
            
    book_txt = book_txt + str(fpath.with_suffix(".md") / "index.md") + "\n"


In [None]:
with open(build_dir / "Book.txt", "w+") as f:
    f.write(book_txt)