In [1]:
#hide
#default_exp docs

# nbprocess.docs
- Exporting a notebook to docs

In [2]:
#export
from nbprocess.read import *
from nbprocess.imports import *
from nbprocess.export import *
from nbprocess.sync import write_nb, nb2dict

from fastcore.script import *
from fastcore.imports import *
from fastcore.basics import *
from fastcore.xtras import *

import uuid
import tempfile

In [3]:
#hide
from fastcore.test import *
from pdb import set_trace
from importlib import reload
import shutil

from nbconvert import MarkdownExporter
import traitlets.config

__file__ = '../nbprocess/export.py'

In [4]:
#     c.TagRemovePreprocessor.remove_cell_tags = ("remove_cell", "hide")
#     c.TagRemovePreprocessor.remove_all_outputs_tags = ("remove_output", "remove_outputs", "hide_output", "hide_outputs")
#     c.TagRemovePreprocessor.remove_input_tags = ('remove_input', 'remove_inputs', "hide_input", "hide_inputs")
#     pp = [InjectMeta, WriteTitle, CleanMagics, BashIdentify, MetaflowTruncate,
#           MetaflowSelectSteps, UpdateTags, TagRemovePreprocessor, CleanFlags, CleanShowDoc, RmEmptyCode,
#           HideInputLines, Black, ImageSave, ImagePath, HTMLEscape]
#     c.MarkdownExporter.preprocessors = pp

In [5]:
#export
def rm_blank_proc(cell):
    "Remove empty cells"
    if(cell.source.strip()==''): cell.source = None

In [6]:
#export
@outp_proc('data', 'text/html')
def html_escape(cell, outp, item):
    "Place HTML in a codeblock and surround it with a <HTMLOutputBlock> component."
    cell.metadata.html_output = True
    return f'```html\n{item}\n```'

In [7]:
def image_save(nb):
    "Saves images stored as bytes in notebooks to disk."
    meta = resources.get('metadata', {})
    nb_name = meta.get('name')
    nb_path = meta.get('path')
    outfiles = resources.get('outputs')
    if nb_name and outfiles:
        resources['fmap'] = {}
        for k,v in outfiles.items():
            dest = Path(nb_path)/f'_{nb_name}_files/{k}'
            dest.parent.mkdir(exist_ok=True)
            dest.write_bytes(v)
            resources['fmap'][f'{k}'] = f'_{nb_name}_files/{k}'
    return nb, resources

In [8]:
def image_path():
    "Changes the image path to the location where `ImageSave` saved the files."
    fmap = resources.get('fmap')
    if fmap:
        for o in cell.get('outputs', []):
            fnames = o.get('metadata', {}).get('filenames', {})
            for k,v in fnames.items():
                fnames[k] = fmap.get(v,v)
    return cell, resources

In [9]:
#export
def _get_cell_id(id_length=36): return uuid.uuid4().hex[:id_length]

def _get_md_cell(content="<!--- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT!-->"):
    return AttrDict({'cell_type': 'markdown', 'id': f'{_get_cell_id()}',
                     'metadata': {}, 'source': f'{content}'})

def insert_warning(nb):
    "Insert Autogenerated Warning Into Notebook after the first cell."
    nb.cells = nb.cells[:1] + [_get_md_cell()] + nb.cells[1:]

In [10]:
#export
def write_md(nb_path, procs=None, post_procs=None, outp_procs=None, pre_procs=None, tpl_file='ob.tpl'):
    nbp = NBProcessor(nb_path, procs, outp_procs=outp_procs)
    nb = nbp.nb
    for proc in L(pre_procs): proc(nb)
    nbp.process()
    for proc in L(post_procs): proc(nb)

    c = traitlets.config.Config()
    base = Path(__file__).parent.resolve()
    c.MarkdownExporter.template_file = str(base/'templates'/tpl_file)
    exp = MarkdownExporter(config=c)

    with tempfile.TemporaryFile('a+') as tmp:
        write_nb(nb, tmp)
        tmp.seek(0)
        md,_ = exp.from_file(tmp)

    dest = base/'../docusaurus/docs'/Path(nb_path).with_suffix('.md').name
    dest.write_text(md)
    return dest

In [15]:
procs = [rm_blank_proc]
outp_procs = [strip_ansi, html_escape]
post_procs = [insert_warning]

path = Path('../tests/docs_test.ipynb')
write_md(path, procs, post_procs, outp_procs=outp_procs)

Path('/home/jhoward/git/nbprocess/nbprocess/../docusaurus/docs/docs_test.md')

In [16]:
nb = read_nb(path)

In [17]:
!cat ../docusaurus/docs/docs_test.md

## a title


some md


```python
import re
```


```python
print('\033[94mhello')
```

<CodeOutputBlock lang="python">

    hello


</CodeOutputBlock>


```python
1+1
```

<CodeOutputBlock lang="python">




    2



</CodeOutputBlock>


```python
%%html
<b>a test</b>
```
    
<HTMLOutputBlock >


```html
<b>a test</b>

```


</HTMLOutputBlock>


```python
import pandas as pd
pd.DataFrame(dict(a=[1,2]))
```
    
<HTMLOutputBlock >

    hi





```html
<div>
<style scoped>
    .dataframe tbody tr th:only-of-type {
        vertical-align: middle;
    }

    .dataframe tbody tr th {
        vertical-align: top;
    }

    .dataframe thead th {
        text-align: right;
    }
</style>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>a</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>1</td>
    </tr>
    <tr>
   

In [14]:
import nbformat

In [20]:
r = nbformat.read(path.open(), nbformat.NO_CONVERT)

In [21]:
r

{'cells': [{'cell_type': 'markdown',
   'id': 'd3df6e7b',
   'metadata': {},
   'source': '## a title'},
  {'cell_type': 'markdown',
   'id': '7746fc7d',
   'metadata': {},
   'source': 'some md'},
  {'cell_type': 'code',
   'execution_count': 13,
   'id': '65751e43',
   'metadata': {},
   'outputs': [],
   'source': 'import re'},
  {'cell_type': 'code',
   'execution_count': 11,
   'id': 'f791a06e',
   'metadata': {},
   'outputs': [{'name': 'stdout',
     'output_type': 'stream',
     'text': '\x1b[94mhello\n'}],
   'source': "print('\\033[94mhello')"},
  {'cell_type': 'code',
   'execution_count': 1,
   'id': '54fe0b87',
   'metadata': {},
   'outputs': [{'data': {'text/plain': '2'},
     'execution_count': 1,
     'metadata': {},
     'output_type': 'execute_result'}],
   'source': '1+1'},
  {'cell_type': 'code',
   'execution_count': 2,
   'id': '70d3c3b6',
   'metadata': {},
   'outputs': [{'data': {'text/html': '<b>a test</b>\n',
      'text/plain': '<IPython.core.display.HTML o

## Export -

In [1]:
#skip
from nbprocess.export import nbs_export
nbs_export()

In [4]:
#     res = ''
#     for cell in nb.cells:
#         src = cell.source
#         if cell.cell_type=='code': src = f"\n```{lang}\n{src}\n```\n"
#         res += src + '\n'
#         if 'outputs' in cell:
#             for outp in cell.outputs:
#                 d = outp.get('data', {})
#                 if 'text/html' in d:
#                     res += '\n'.join(d['text/html'])
#                 elif 'text/plain' in d:
#                     t = '\n'.join(d['text/plain'])
#                     res += f"```\n  {t}\n```\n"
#     return res