In [None]:
# export
from fastai_local.core import *
from fastai_local.test import *
import json,re,os,shutil,glob
from textwrap import TextWrapper
from typing import Iterable,Iterator,Generator,Callable,Sequence,List,Tuple,Union,Optional
from pathlib import Path

In [None]:
# default_exp export

## Reading a notebook

### What's a notebook?

A jupyter notebook is a json file behind the scenes. We can just read it with the json module, which will return a nested dictionary of dictionaries/lists of dictionaries.

In [None]:
test_nb = json.load(open('99_export.ipynb'))

The root has four keys: `cells` contains the cells of the notebook, `metadata` some stuff around the version of python used to execute the notebook, `nbformat` and `nbformat_minor` the version of nbformat. 

In [None]:
test_nb.keys()

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [None]:
test_nb['metadata']

{'kernelspec': {'display_name': 'Python 3',
  'language': 'python',
  'name': 'python3'}}

In [None]:
f"{test_nb['nbformat']}.{test_nb['nbformat_minor']}"

'4.2'

The cells key then contains a list of cells. Each one is a new dictionary that contains entries like the type (code or markdown), the source (what is written in the cell) and the output (for code cells).

In [None]:
test_nb['cells'][0]

{'cell_type': 'code',
 'execution_count': None,
 'metadata': {},
 'outputs': [],
 'source': ['# export\n',
  'from fastai_local.core import *\n',
  'from fastai_local.test import *\n',
  'import json,re,os,shutil,glob\n',
  'from textwrap import TextWrapper\n',
  'from typing import Iterable,Iterator,Generator,Callable,Sequence,List,Tuple,Union,Optional\n',
  'from pathlib import Path']}

### Finding patterns

In [None]:
# export
def _check_re(cell, pat):
    if cell['cell_type'] != 'code': return False
    src = cell['source']
    if len(src) == 0: return False
    return re.match(pat, src[0], re.IGNORECASE)

In [None]:
# export
def is_export(cell, default):
    "Check if `cell` is to be exported and returns the name of the module."
    if _check_re(cell, r'^\s*#\s*exports?\s*$'): 
        if default is None: print(f"This cell doesn't have an export destination and was ignored:\n{cell['source'][1]}")
        return default
    tst = _check_re(cell, r'^\s*#\s*exports?\s*(\S+)\s*$')
    return os.path.sep.join(tst.groups()[0].split('.')) if tst else None 

The cells to export are marked with an `#export` or `#exports` code, potentially with a module name where we want it exported. The default is given in a cell of the form `#default_exp bla` inside the notebook (usually at the top), though in this function, it needs the be passed (the final script will read the whole notebook to find it).

In [None]:
cell = test_nb['cells'][0].copy()
test_eq(is_export(cell, 'export'), 'export')
cell['source'][0] = "# exports" 
test_eq(is_export(cell, 'export'), 'export')
cell['source'][0] = "# export mod" 
test_eq(is_export(cell, 'export'), 'mod')
cell['source'][0] = "# export mod.file" 
test_eq(is_export(cell, 'export'), 'mod/file')
cell['source'][0] = "# expt mod.file"
assert is_export(cell, 'export') is None
cell['source'][0] = "# export"

In [None]:
# export
def find_default_export(cells):
    "Find in `cells` the default export module."
    for cell in cells:
        tst = _check_re(cell, r'^\s*#\s*default_exp\s*(\S*)\s*$')
        if tst: return tst.groups()[0]

Stops at the first cell containing a `#default_exp` code and return the value behind. Returns `None` if there are no cell with that code.

In [None]:
test_eq(find_default_export(test_nb['cells']), 'export')
assert find_default_export(test_nb['cells'][2:]) is None

### Exporting notebooks

We're now ready to export notebooks!

In [None]:
# export
def _create_mod_file(fname, nb_path):
    "Create a module file for `fname`."
    fname.parent.mkdir(parents=True, exist_ok=True)
    with open(fname, 'w') as f: 
        f.write(f"#AUTOGENERATED! DO NOT EDIT! File to edit: dev/{nb_path.name} (unless otherwise specified).")
        f.write('\n\n__all__ = []')

In [None]:
# export
def _func_class_names(code):
    names = re.findall(r'^(?:def|class)\s+([^\(\s]*)\s*\(', code, re.MULTILINE)
    return [n for n in names if not n.startswith('_')]

In [None]:
test_eq(_func_class_names("def my_func(x)():\n  pass\nclass MyClass():"), ["my_func", "MyClass"])
#Indented funcs are ignored (funcs inside a class)
test_eq(_func_class_names("  def my_func(x)():\n  pass\nclass MyClass():"), ["MyClass"])
#Private funcs are ignored
test_eq(_func_class_names("def _my_func():\n  pass\nclass MyClass():"), ["MyClass"])
#trailing spaces
test_eq(_func_class_names("def my_func ():\n  pass\nclass MyClass():"), ["my_func", "MyClass"])

In [None]:
#export
def _add2add(fname, names, line_width=120):
    if len(names) == 0: return
    with open(fname, 'r') as f: text = f.read()
    tw = TextWrapper(width=120, initial_indent='', subsequent_indent=' '*11, break_long_words=False)
    re_all = re.search(r'__all__\s*=\s*\[([^\]]*)\]', text)
    start,end = re_all.start(),re_all.end()
    text_all = tw.wrap(f"{text[start:end-1]}{'' if text[end-2]=='[' else ', '}{', '.join(names)}]")
    with open(fname, 'w') as f: f.write(text[:start] + '\n'.join(text_all) + text[end:])

In [None]:
fname = 'test_add.txt'
with open(fname, 'w') as f: f.write("Bla\n__all__ = [my_file, MyClas]\nBli")
_add2add(fname, ['new_function'])
with open(fname, 'r') as f: 
    test_eq(f.read(), "Bla\n__all__ = [my_file, MyClas, new_function]\nBli")
_add2add(fname, [f'new_function{i}' for i in range(10)])
with open(fname, 'r') as f: 
    test_eq(f.read(), """Bla
__all__ = [my_file, MyClas, new_function, new_function0, new_function1, new_function2, new_function3, new_function4,
           new_function5, new_function6, new_function7, new_function8, new_function9]
Bli""")
os.remove(fname)

In [None]:
# export
def _relative_import(name, fname):
    mods = name.split('.')
    splits = str(fname).split(os.path.sep)
    if mods[0] not in splits: return name
    splits = splits[splits.index(mods[0]):]
    while splits[0] == mods[0]: splits,mods = splits[1:],mods[1:]
    return '.' * (len(splits)-len(mods)+1) + '.'.join(mods)

In [None]:
test_eq(_relative_import('fastai_local.core', Path('fastai_local')/'data.py'), '.core')
test_eq(_relative_import('fastai_local.core', Path('fastai_local')/'vision'/'data.py'), '..core')
test_eq(_relative_import('fastai_local.vision.transform', Path('fastai_local')/'vision'/'data.py'), '.transform')

In [None]:
pat = re.compile(r'from (fastai_local.\S*) import (\S*)$')
m = re.match(pat, "from fastai_local.core import *\n")
m.groups()

('fastai_local.core', '*')

In [None]:
# export
def _deal_import(code_lines, fname):
    pat = re.compile(r'from (fastai_local.\S*) import (\S*)$')
    lines = []
    for line in code_lines:
        match = re.match(pat, line)
        if match: lines.append(f"from {_relative_import(match.groups()[0], fname)} import {match.groups()[1]}\n")
        else: lines.append(line)
    return lines                    

In [None]:
#export 
def _notebook2script(fname):
    "Finds cells starting with `#export` and puts them into a new module"
    fname = Path(fname)
    nb = json.load(open(fname,'r'))
    default = find_default_export(nb['cells'])
    if default is not None: 
        default = os.path.sep.join(default.split('.'))
        _create_mod_file(Path.cwd()/'fastai_local'/f'{default}.py', fname)
    exports = [is_export(c, default) for c in nb['cells']]
    cells = [(c,e) for (c,e) in zip(nb['cells'],exports) if e is not None]
    for (c,e) in cells:
        fname_out = Path.cwd()/'fastai_local'/f'{e}.py'
        orig = '' if e==default else f'#Comes from {fname.name}.\n'
        code = '\n\n' + orig + ''.join(_deal_import(c['source'][1:], fname_out))
        # remove trailing spaces
        _add2add(fname_out, [f"'{f}'" for f in _func_class_names(code)])
        code = re.sub(r' +$', '', code, flags=re.MULTILINE)
        with open(fname_out, 'a') as f: f.write(code)
    print(f"Converted {fname}.")

In [None]:
#export 
def _get_sorted_files(all_fs: Union[bool,str], up_to=None):
    "Return the list of files corresponding to `g` in the current dir."
    if (all_fs==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True
    else: ret = glob.glob(all_fs) if isinstance(g,str) else []
    if len(ret)==0: print('WARNING: No files found')
    if up_to is not None: ret = [f for f in ret if str(f)<=str(up_to)]
    return sorted(ret)

In [None]:
#export 
def notebook2script(fname=None, all_fs=None, up_to=None):
    # initial checks
    assert fname or all_fs
    if (all_fs is None) and (up_to is not None): all_fs=True # Enable allFiles if upTo is present
    fnames = _get_sorted_files(all_fs, up_to=up_to) if all_fs else [fname]
    [_notebook2script(f) for f in fnames]

Finds cells starting with `#export` and puts them into the appropriate module.
* `fname`: the filename of one notebook to convert
* `all_fs`: `True` if you want to convert all notebook files in the folder or a glob expression
* `up_to`: converts all notebooks respecting the previous arg up to a certain number

Examples of use in console:
```
notebook2script 00_export.ipynb                 # Parse 00_export.ipynb
notebook2script --all_fs=True                   # Parse all files
notebook2script --all_fs=nb*                    # Parse all files starting with nb*
notebook2script --up_to=10                      # Parse all files with (name<='10')
notebook2script --all_fs=*_*.ipynb --up_to=10   # Parse all files with an '_' and (name<='10')
```

In [None]:
notebook2script('99_export.ipynb')

Converted 99_export.ipynb.


In [None]:
notebook2script(all_fs=True)

Converted 00_test.ipynb.
Converted 01_core.ipynb.
Converted 02_data_pipeline.ipynb.
Converted 03_data_source.ipynb.
Converted 99_export.ipynb.
