In [None]:
# export
import json,fire,re,os,shutil,glob
from pathlib import Path
from fastai.gen_doc.nbdoc import show_doc
from typing import Union, Optional
from typeguard import typechecked
NoneType = type(None)

In [None]:
# default_exp export

## Type checking

Runtime type checking is handy, so let's make it easy!

In [None]:
#export
def chk(f): return typechecked(always=True)(f)

In [None]:
@chk
def test_chk(a:Union[int,NoneType]=None): pass
test_chk(1)
test_chk()

## Reading a notebook

### Finding export cells

A jupyter notebook is a json file behind the scenes. We can just read it with the json module, which will return a nested dictionary of dictionaries/lists of dictionaries.

In [None]:
test_nb = json.load(open('00_export.ipynb'))

The root has four keys: `cells` contains the cells of the notebook, `metadata` some stuff around the version of python used to execute the notebook, `nbformat` and `nbformat_minor` the version of nbformat. 

In [None]:
test_nb.keys()

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [None]:
test_nb['metadata']

{'kernelspec': {'display_name': 'Python 3',
  'language': 'python',
  'name': 'python3'},
 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
  'file_extension': '.py',
  'mimetype': 'text/x-python',
  'name': 'python',
  'nbconvert_exporter': 'python',
  'pygments_lexer': 'ipython3',
  'version': '3.7.1'}}

In [None]:
f"{test_nb['nbformat']}.{test_nb['nbformat_minor']}"

'4.2'

The cells key then contains a list of cells. Each one is a new dictionary that contains entries like the type (code or markdown), the source (what is written in the cell) and the output (for code cells).

In [None]:
test_nb['cells'][0]

{'cell_type': 'code',
 'execution_count': 1,
 'metadata': {},
 'outputs': [],
 'source': ['# export\n',
  'import json,fire,re,os,shutil\n',
  'from pathlib import Path']}

This function will be used to detect different `# code` at the beginning of cells.

In [None]:
# export
def check_re_pattern(cell, pat):
    "Check if `cell` contains given `pat`."
    if cell['cell_type'] != 'code': return False
    src = cell['source']
    if len(src) == 0: return False
    return re.match(pat, src[0], re.IGNORECASE) 

Now we can check for the cell that need to be exported.

In [None]:
# export
def is_export(cell, default):
    "Check if `cell` is to be exported and returns the name of the module."
    if check_re_pattern(cell, r'^\s*#\s*exports?\s*$'): return default
    tst = check_re_pattern(cell, r'^\s*#\s*exports?\s*(\S+)\s*$')
    return os.path.sep.join(tst.groups()[0].split('.')) if tst else None 

We check if the cell begins with an `#export` or `#exports` code, potentially with a module name where we want it exported. The default is given in a cell of the form `#default_exp bla` inside the notebook (usually at the top), though in this function, it needs the be passed (the final script will read the whole notebook to find it).

In [None]:
cell = test_nb['cells'][0].copy()
assert is_export(cell, 'export') == 'export'
cell['source'][0] = "# exports" 
assert is_export(cell, 'export') == 'export'
cell['source'][0] = "# export mod" 
assert is_export(cell, 'export') == 'mod'
cell['source'][0] = "# export mod.file" 
assert is_export(cell, 'export') == 'mod/file'
cell['source'][0] = "# expt mod.file"
assert is_export(cell, 'export') is None
cell['source'][0] = "# export"

### Finding the default export path

In [None]:
# export
def find_default_export(cells):
    for cell in cells:
        tst = check_re_pattern(cell, r'^\s*#\s*default_exp\s*(\S*)\s*$')
        if tst is not None: return tst.groups()[0]

In [None]:
find_default_export(test_nb['cells'])

'export'

### Exporting one notebook

In [None]:
#export 
def _notebook2script(fname):
    "Finds cells starting with `#export` and puts them into a new module"
    fname = Path(fname)
    nb = json.load(open(fname,'r'))
    default = find_default_export(nb['cells'])
    default = os.path.sep.join(default.split('.'))
    fname_out = Path.cwd()/'fastai_local'/f'{default}.py'
    code_cells = [c for c in nb['cells'] if is_export(c, default) is not None]
    module = f'''
#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: dev/{fname.name}

'''
    for cell in code_cells: module += ''.join(cell['source'][1:]) + '\n\n'
    # remove trailing spaces
    module = re.sub(r' +$', '', module, flags=re.MULTILINE)
    output_path = fname.parent/'exp'/fname_out
    open(output_path,'w').write(module[:-2])
    print(f"Converted {fname} to {output_path}")

### Exporting more notebooks

In [None]:
def get_sorted_files(g: Union[bool,str], up_to=None):
    "Return the list of files corresponding to `g` in the current dir."
    assert isinstance(g, (bool, str))
    if (g==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True
    else: ret = glob.glob(g) if isinstance(g,str) else []
    if len(ret)==0: print('WARNING: No files found')
    if up_to is not None: ret = [f for f in ret if str(f)<=str(up_to)]
    return sorted(ret)

Note that `g` can either be `True` (for all notebook files in the directory) or a glob pattern (like `00_*.ipynb`). If `upTo` is passed, notebooks are only converted up to that poin

In [None]:
def notebook2script(fname=None, allFiles=None, upTo=None):
    # initial checks
    if (allFiles is None) and (upTo is not None): allFiles=True # Enable allFiles if upTo is present
    if (fname is None) and (not allFiles): print('Should provide a file name')
    if not allFiles: notebook2scriptSingle(fname)
    else:
        print('Begin...')
        [notebook2scriptSingle(f) for f in getSortedFiles(allFiles,upTo)]
        print('...End')

   '''Finds cells starting with `#export` and puts them into a new module
       + allFiles: convert all files in the folder
       + upTo: convert files up to specified one included
       
       ES: 
       notebook2script --allFiles=True   # Parse all files
       notebook2script --allFiles=nb*   # Parse all files starting with nb*
       notebook2script --upTo=10   # Parse all files with (name<='10')
       notebook2script --allFiles=*_*.ipynb --upTo=10   # Parse all files with an '_' and (name<='10')

In [None]:
#export 
def _notebook2script(fname):
    "Finds cells starting with `#export` and puts them into a new module"
    fname = Path(fname)
    nb = json.load(open(fname,'r'))
    default = find_default_export(nb['cells'])
    default = os.path.sep.join(default.split('.'))
    fname_out = Path.cwd()/'fastai_local'/f'{default}.py'
    code_cells = [c for c in nb['cells'] if is_export(c, default) is not None]
    module = f'''
#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: dev/{fname.name}

'''
    for cell in code_cells: module += ''.join(cell['source'][1:]) + '\n\n'
    # remove trailing spaces
    module = re.sub(r' +$', '', module, flags=re.MULTILINE)
    output_path = fname.parent/'exp'/fname_out
    open(output_path,'w').write(module[:-2])
    print(f"Converted {fname} to {output_path}")

In [None]:
notebook2scriptSingle('00_export.ipynb')

Converted 00_export.ipynb to /home/ubuntu/fastai_docs/dev/fastai_local/export.py


In [None]:
if __name__ == '__main__': fire.Fire(notebook2script)