In [None]:
# export
from fastai_local.core import *
from fastai_local.test import *
from fastai_local.imports import *
from fastai_local.notebook.export import *
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor, Preprocessor
from nbconvert import HTMLExporter
from nbformat.sign import NotebookNotary
from traitlets.config import Config

In [None]:
# default_exp notebook.export2html

# Converting notebooks to html

> The functions that transform the dev notebooks in the documentation of the library

## Preprocessing notebook

### Cell processors

In [None]:
#export
def remove_widget_state(cell):
    "Remove widgets in the output of `cells`"
    if cell['cell_type'] == 'code' and 'outputs' in cell:
        cell['outputs'] = [l for l in cell['outputs'] 
                           if not ('data' in l and 'application/vnd.jupyter.widget-view+json' in l.data)]
    return cell

In [None]:
#export
def hide_cells(cell):
    "Hide `cell` that need to be hidden"
    if check_re(cell, r's*show_doc\(|^\s*#\s*(export)\s+'): 
        cell['metadata'] = {'hide_input': True}
    return cell

In [None]:
for source in ['show_doc(read_nb)', '# export\nfrom fastai_local.core import *']:
    cell = {'cell_type': 'code', 'source': 'show_doc(read_nb)'}
    cell1 = hide_cells(cell.copy())
    assert 'metadata' in cell1
    assert 'hide_input' in cell1['metadata']
    assert cell1['metadata']['hide_input']

cell = {'cell_type': 'code', 'source': '# exports\nfrom fastai_local.core import *'}
test_eq(cell, hide_cells(cell.copy()))

In [None]:
#export
def remove_hidden(cells):
    res = []
    pat = re.compile(r'^\s*#\s*(hide|default_exp)\s+')
    for cell in cells:
        if cell['cell_type']=='markdown' or re.search(pat, cell['source']) is None:
            res.append(cell)
    return res

In [None]:
cells = [{'cell_type': 'code', 'source': source} for source in [
    '# export\nfrom fastai_local.core import *', 
    '# hide\nfrom fastai_local.core import *',
    '#exports\nsuper code',
    '#default_exp notebook.export',
    'show_doc(read_nb)']] + [{'cell_type': 'markdown', 'source': 'nice'}]
         
cells1 = remove_hidden(cells)
test_eq(len(cells1), 4)
test_eq(cells1[0], cells[0])
test_eq(cells1[1], cells[2])
test_eq(cells1[2], cells[4])
test_eq(cells1[3], cells[5])

In [None]:
#export
def _show_doc_cell(name):
    return {'cell_type': 'code',
            'execution_count': None,
            'metadata': {},
            'outputs': [],
            'source': f"show_doc({name})"}

def add_show_docs(cells):
    "Add `show_doc` for each exported function or class"
    res = []
    for cell in cells:
        res.append(cell)
        if check_re(cell, r'^\s*#\s*exports?\s*'):
            names = export_names(cell['source'], func_only=True)
            for n in names: res.append(_show_doc_cell(n))
    return res

In [None]:
tst_nb = read_nb('91_export.ipynb')
for i,cell in enumerate(tst_nb['cells']):
    if cell['source'].startswith('#export\ndef read_nb'): break
tst_cells = [c.copy() for c in tst_nb['cells'][i-1:i+1]]
added_cells = add_show_docs(tst_cells)
test_eq(len(added_cells), 3)
test_eq(added_cells[0], tst_nb['cells'][i-1])
test_eq(added_cells[1], tst_nb['cells'][i])
test_eq(added_cells[2], _show_doc_cell('read_nb'))

In [None]:
# export
def remove_fake_headers(cells):
    "Remove in `cells` the fake header"
    res = []
    pat = re.compile(r'#+.*-$')
    for cell in cells:
        if cell['cell_type']=='code' or re.search(pat, cell['source']) is None:
            res.append(cell)
    return res

In [None]:
cells = [{'cell_type': 'markdown',
          'metadata': {},
          'source': '### Fake-'}] + tst_nb['cells'][:10]
cells1 = remove_fake_headers(cells)
test_eq(len(cells1), len(cells)-1)
test_eq(cells1[0], cells[1])

In [None]:
# export
def remove_empty(cells):
    "Remove in `cells` the empty cells"
    return [c for c in cells if len(c['source']) >0]

### Grabbing metada

In [None]:
x = [1,2,3]
x.pop(1)
x

[1, 3]

In [None]:
# export
def get_metadata(cells):
    "Find the cell with title and summary in `cells`."
    pat = re.compile('^\s*#\s*([^\n]*)\n*>\s*([^\n]*)')
    for i,cell in enumerate(cells):
        if cell['cell_type'] == 'markdown':
            match = re.match(pat, cell['source'])
            if match: 
                cells.pop(i)
                return {'keywords': 'fastai',
                        'summary' : match.groups()[1],
                        'title'   : match.groups()[0]}
    return {'keywords': 'fastai',
            'summary' : 'summary',
            'title'   : 'Title'}

In [None]:
tst_nb = read_nb('91_export.ipynb')
test_eq(get_metadata(tst_nb['cells']), {
    'keywords': 'fastai',
    'summary': 'The functions that transform the dev notebooks in the fastai library',
    'title': 'Converting notebooks to modules'})
#The cell with the metada is poped out, so if we do it a second time we get the default.
test_eq(get_metadata(tst_nb['cells']), {'keywords': 'fastai',
            'summary' : 'summary',
            'title'   : 'Title'})

### Executing show_doc cells

In [None]:
# export
class ExecuteShowDocPreprocessor(ExecutePreprocessor):
    "An `ExecutePreprocessor` that only executes `show_doc` and `import` cells"
    def preprocess_cell(self, cell, resources, index):
        pat = re.compile(r"show_doc\(([\w\.]*)|^\s*#\s*exports?\s*")
        if 'source' in cell and cell.cell_type == "code":
            if re.search(pat, cell['source']):
                return super().preprocess_cell(cell, resources, index)
        return cell, resources

In [None]:
# export
def execute_nb(nb, metadata=None, show_doc_only=True):
    "Execute `nb` (or only the `show_doc` cells) with `metadata`"
    ep_cls = ExecuteShowDocPreprocessor if show_doc_only else ExecutePreprocessor
    ep = ep_cls(timeout=600, kernel_name='python3')
    metadata = metadata or {}
    pnb = nbformat.from_dict(nb)
    ep.preprocess(pnb, metadata)
    return pnb

In [None]:
fake_nb = {k:v for k,v in tst_nb.items() if k != 'cells'}
fake_nb['cells'] = [tst_nb['cells'][0].copy()] + added_cells
fake_nb = execute_nb(fake_nb)
assert len(fake_nb['cells'][-1]['outputs']) > 0

## Conversion

In [None]:
#hide
#Tricking jupyter notebook to have a __file__ attribute. All _file_ will be replaced by __file__
_file_ = Path('fastai_local').absolute()/'notebook'/'export.py'

In [None]:
# export
def _exporter():
    exporter = HTMLExporter(Config())
    exporter.exclude_input_prompt=True
    exporter.exclude_output_prompt=True
    exporter.template_file = 'jekyll.tpl'
    exporter.template_path.append(str(Path(_file_).parent))
    return exporter

In [None]:
# export
process_cells = [remove_fake_headers, add_show_docs, remove_hidden, remove_empty]
process_cell  = [hide_cells, remove_widget_state]

In [None]:
# export
def convert_nb(fname, dest_path='docs'):
    "Convert a notebook `fname` to html file in `dest_path`."
    fname = Path(fname).absolute()
    nb = read_nb(fname)
    nb['cells'] = compose(*process_cells)(nb['cells'])
    nb['cells'] = [compose(*process_cell)(c) for c in nb['cells']]
    fname = Path(fname).absolute()
    dest_name = '.'.join(fname.with_suffix('.html').name.split('_')[1:])
    meta_jekyll = get_metadata(nb['cells'])
    meta_jekyll['nb_path'] = f'{fname.parent.name}/{fname.name}'
    nb = execute_nb(nb)
    with open(f'{dest_path}/{dest_name}','w') as f:
        f.write(_exporter().from_notebook_node(nb, resources=meta_jekyll)[0])

In [None]:
# export
def convert_all(path='.', dest_path='docs', force_all=False):
    "Convert all notebooks in `path` to html files in `dest_path`."
    path = Path(path)
    changed_cnt = 0
    for fname in path.glob("*.ipynb"):
        # only rebuild modified files
        if fname.name.startswith('_'): continue
        fname_out = Path(dest_path)/'.'.join(fname.with_suffix('.html').name.split('_')[1:])
        if not force_all and fname_out.exists() and os.path.getmtime(fname) < os.path.getmtime(fname_out): 
            continue
        print(f"converting: {fname} => {fname_out}")
        changed_cnt += 1
        try: convert_nb(fname, dest_path=dest_path)
        except: print("Failed")
    if changed_cnt==0: print("No notebooks were modified")

In [None]:
#hide
convert_all()

converting: 04_data_core.ipynb => docs/data.core.html
converting: 90_notebook_core.ipynb => docs/notebook.core.html
converting: 91_notebook_export.ipynb => docs/notebook.export.html
converting: 92_notebook_showdoc.ipynb => docs/notebook.showdoc.html
Failed
converting: 03_data_external.ipynb => docs/data.external.html
converting: 05_data_source.ipynb => docs/data.source.html
converting: 06_pets_tutorial.ipynb => docs/pets.tutorial.html
converting: 02_data_pipeline.ipynb => docs/data.pipeline.html
Failed
converting: 93_notebook_export2html.ipynb => docs/notebook.export2html.html


## Export-

In [None]:
#hide
notebook2script(all_fs=True)