In [None]:
# export
import json,fire,re,os,shutil,glob
from pathlib import Path
from fastai.gen_doc.nbdoc import show_doc
from typing import Union, Optional
from typeguard import typechecked
NoneType = type(None)

In [None]:
# default_exp export

## Reading a notebook

### What's a notebook?

A jupyter notebook is a json file behind the scenes. We can just read it with the json module, which will return a nested dictionary of dictionaries/lists of dictionaries.

In [None]:
test_nb = json.load(open('00_export.ipynb'))

The root has four keys: `cells` contains the cells of the notebook, `metadata` some stuff around the version of python used to execute the notebook, `nbformat` and `nbformat_minor` the version of nbformat. 

In [None]:
test_nb.keys()

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [None]:
test_nb['metadata']

{'kernelspec': {'display_name': 'Python 3',
  'language': 'python',
  'name': 'python3'},
 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
  'file_extension': '.py',
  'mimetype': 'text/x-python',
  'name': 'python',
  'nbconvert_exporter': 'python',
  'pygments_lexer': 'ipython3',
  'version': '3.7.1'}}

In [None]:
f"{test_nb['nbformat']}.{test_nb['nbformat_minor']}"

'4.2'

The cells key then contains a list of cells. Each one is a new dictionary that contains entries like the type (code or markdown), the source (what is written in the cell) and the output (for code cells).

In [None]:
test_nb['cells'][0]

{'cell_type': 'code',
 'execution_count': 1,
 'metadata': {},
 'outputs': [],
 'source': ['# export\n',
  'import json,fire,re,os,shutil,glob\n',
  'from pathlib import Path\n',
  'from fastai.gen_doc.nbdoc import show_doc\n',
  'from typing import Union, Optional\n',
  'from typeguard import typechecked\n',
  'NoneType = type(None)']}

### Finiding patterns

In [None]:
# export
def check_re_pattern(cell, pat):
    "Check if `cell` contains given `pat`."
    if cell['cell_type'] != 'code': return False
    src = cell['source']
    if len(src) == 0: return False
    return re.match(pat, src[0], re.IGNORECASE) 

In [None]:
show_doc(check_re_pattern)

<h4 id="check_re_pattern" class="doc_header"><code>check_re_pattern</code><a class="source_link" data-toggle="collapse" data-target="#check_re_pattern-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>check_re_pattern</code>(**`cell`**, **`pat`**)

<div class="collapse" id="check_re_pattern-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#check_re_pattern-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>check_re_pattern</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Check if `cell` contains given `pat`.  

This functions returns `None` if `pat` isn't present in `cell` and the re match object otherwise, in case you want to access the groups it matched.

In [None]:
cell = {'cell_type': 'code', 'source': ['# export\n', 'import fastai']}
assert check_re_pattern(cell, r'^#') is not None
assert check_re_pattern(cell, r'bla') is None
assert check_re_pattern(cell, r'^#\s*(\S*)\s*$').groups()[0] == 'export'

In [None]:
# export
def is_export(cell, default):
    "Check if `cell` is to be exported and returns the name of the module."
    if check_re_pattern(cell, r'^\s*#\s*exports?\s*$'): 
        if default is None: print(f"This cell doesn't have an export destination and was ignored:\n{cell['source'][1]}")
        return default
    tst = check_re_pattern(cell, r'^\s*#\s*exports?\s*(\S+)\s*$')
    return os.path.sep.join(tst.groups()[0].split('.')) if tst else None 

In [None]:
show_doc(is_export)

<h4 id="is_export" class="doc_header"><code>is_export</code><a class="source_link" data-toggle="collapse" data-target="#is_export-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>is_export</code>(**`cell`**, **`default`**)

<div class="collapse" id="is_export-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#is_export-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>is_export</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Check if `cell` is to be exported and returns the name of the module.  

The cells to export are marked with an `#export` or `#exports` code, potentially with a module name where we want it exported. The default is given in a cell of the form `#default_exp bla` inside the notebook (usually at the top), though in this function, it needs the be passed (the final script will read the whole notebook to find it).

In [None]:
cell = test_nb['cells'][0].copy()
assert is_export(cell, 'export') == 'export'
cell['source'][0] = "# exports" 
assert is_export(cell, 'export') == 'export'
cell['source'][0] = "# export mod" 
assert is_export(cell, 'export') == 'mod'
cell['source'][0] = "# export mod.file" 
assert is_export(cell, 'export') == 'mod/file'
cell['source'][0] = "# expt mod.file"
assert is_export(cell, 'export') is None
cell['source'][0] = "# export"

In [None]:
# export
def find_default_export(cells):
    "Find in `cells` the default export module."
    for cell in cells:
        tst = check_re_pattern(cell, r'^\s*#\s*default_exp\s*(\S*)\s*$')
        if tst: return tst.groups()[0]

In [None]:
show_doc(find_default_export)

<h4 id="find_default_export" class="doc_header"><code>find_default_export</code><a class="source_link" data-toggle="collapse" data-target="#find_default_export-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>find_default_export</code>(**`cells`**)

<div class="collapse" id="find_default_export-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#find_default_export-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>find_default_export</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Find in `cells` the default export module.  

Stops at the first cell containing a `#default_exp` code and return the value behind. Returns `None` if there are no cell with that code.

In [None]:
assert find_default_export(test_nb['cells']) == 'export'
assert find_default_export(test_nb['cells'][2:]) is None

### Exporting notebooks

We're now ready to export notebooks!

In [None]:
# export
def _create_mod_file(fname, nb_path):
    "Create a module file for `fname`."
    with open(fname, 'w') as f: 
        f.write(f"#AUTOGENERATED! DO NOT EDIT! File to edit: dev/{nb_path.name} (unless otherwise specified).")

In [None]:
#export 
def _notebook2script(fname):
    "Finds cells starting with `#export` and puts them into a new module"
    fname = Path(fname)
    nb = json.load(open(fname,'r'))
    default = find_default_export(nb['cells'])
    if default is not None: 
        default = os.path.sep.join(default.split('.'))
        _create_mod_file(Path.cwd()/'fastai_local'/f'{default}.py', fname)
    exports = [is_export(c, default) for c in nb['cells']]
    cells = [(c,e) for (c,e) in zip(nb['cells'],exports) if e is not None]
    for (c,e) in cells:
        fname_out = Path.cwd()/'fastai_local'/f'{e}.py'
        orig = '' if e==default else f'#Comes from {fname.name}.\n'
        code = '\n\n' + orig + ''.join(c['source'][1:])
        # remove trailing spaces
        code = re.sub(r' +$', '', code, flags=re.MULTILINE)
        with open(fname_out, 'a') as f: f.write(code)
    print(f"Converted {fname}.")

In [None]:
#export 
def _get_sorted_files(all_fs: Union[bool,str], up_to=None):
    "Return the list of files corresponding to `g` in the current dir."
    if (all_fs==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True
    else: ret = glob.glob(all_fs) if isinstance(g,str) else []
    if len(ret)==0: print('WARNING: No files found')
    if up_to is not None: ret = [f for f in ret if str(f)<=str(up_to)]
    return sorted(ret)

In [None]:
#export 
def notebook2script(fname=None, all_fs:Optional[Union[bool,str]]=None, up_to=None):
    # initial checks
    assert fname or all_fs
    if (all_fs is None) and (up_to is not None): all_fs=True # Enable allFiles if upTo is present
    fnames = _get_sorted_files(all_fs, up_to=up_to) if all_fs else [fname]
    [_notebook2script(f) for f in fnames]

Finds cells starting with `#export` and puts them into the appropriate module.
* `fname`: the filename of one notebook to convert
* `all_fs`: `True` if you want to convert all notebook files in the folder or a glob expression
* `up_to`: converts all notebooks respecting the previous arg up to a certain number

Examples of use in console:
```
notebook2script 00_export.ipynb                 # Parse 00_export.ipynb
notebook2script --all_fs=True                   # Parse all files
notebook2script --all_fs=nb*                    # Parse all files starting with nb*
notebook2script --up_to=10                      # Parse all files with (name<='10')
notebook2script --all_fs=*_*.ipynb --up_to=10   # Parse all files with an '_' and (name<='10')
```

In [None]:
notebook2script('99_export.ipynb')

Converted 99_export.ipynb.


In [None]:
notebook2script(all_fs=True)

Converted 00_test.ipynb.
Converted 01_core.ipynb.
Converted 99_export.ipynb.
