In [1]:
#default_exp mdxexp

# Docusaurus preprocessors for markdown export

> Custom preprocessors that help convert notebook content into mdx

In [2]:
# export
from fastcore.basics import *
from pathlib import Path
from nbdev2.docexp import preprocess_cell,_run_preprocessor,DocExporter,nb2md
from nbdev2.convert import export_docs
from html.parser import HTMLParser

In [3]:
#hide
__file__ = 'nbdev2_docusaurus/mdxexp.py'
_test_file = 'tests/docs_test.ipynb'

This module defines [nbconvert.Custom Preprocessors](https://nbconvert.readthedocs.io/en/latest/nbconvert_library.html#Custom-Preprocessors) that facilitate transforming notebook content into MDX, which is a variation of markdown.

## Escaping HTML

In [4]:
#export
class _HTMLdf(HTMLParser):
    "HTML Parser that finds a dataframe."
    df,scoped = False,False
    def handle_starttag(self, tag, attrs):
        if tag == 'style' and 'scoped' in dict(attrs): self.scoped=True
    def handle_data(self, data):
        if '.dataframe' in data and self.scoped: self.df=True
    def handle_endtag(self, tag):
        if tag == 'style': self.scoped=False
                
    @classmethod
    def search(cls, x):
        parser = cls()
        parser.feed(x)
        return parser.df

In [5]:
#export
@preprocess_cell
def HTMLEscape(cell):
    "Place HTML in a codeblock and surround it with a <HTMLOutputBlock> component."
    if cell.cell_type !='code': return
    for o in cell.outputs:
        if nested_idx(o, 'data', 'text/html'):
            cell.metadata.html_output = True
            html = o['data']['text/html']
            cell.metadata.html_center = not _HTMLdf.search(html)
            o['data']['text/html'] = '```html\n'+html.strip()+'\n```'

By default, HTML is incompatible with MDX.  We place HTML in a code block and wrap it with the a custom component so that the static site generator can render it.

In [6]:
c, _ = _run_preprocessor([HTMLEscape], _test_file)
assert '```html\n<div>' in c and '</div>\n```' in c

## Custom DocExporter

In [7]:
#export
class MdxExporter(DocExporter):
    "An MDX exporter"
    tpl_path=(Path(__file__).parent/'tpl').resolve()
    tpl_file='ob.md.j2'
    @property
    def pps(self): return super().pps + [HTMLEscape]

In [8]:
# Test using nb2md

_dest_file = Path(_test_file).with_suffix('.md')
_dest_file.unlink(missing_ok=True)
nb2md(_test_file, exp_cls=MdxExporter)
assert _dest_file.exists(), f'{_dest_file} does not exist.'
c = _dest_file.read_text()
assert '<HTMLOutputBlock' in c and '</HTMLOutputBlock>' in c and 'center' not in c
_dest_file.unlink(missing_ok=True)

In [10]:
#Test using export_docs

export_docs(_test_file, dest='tests')
assert _dest_file.exists(), f'{_dest_file} does not exist.'
c = _dest_file.read_text()
assert '<HTMLOutputBlock' in c and '</HTMLOutputBlock>' in c and 'center' not in c
_dest_file.unlink(missing_ok=True)

converting: tests/docs_test.ipynb


## Export -

In [39]:
#skip
from nbprocess.export import nbs_export
nbs_export()