In [1]:
# export
from local.imports import *
from local.notebook.core import *
import nbformat,inspect
from nbformat.sign import NotebookNotary

In [2]:
# default_exp notebook.export
# default_cls_lvl 3

In [2]:
#export
def read_nb(fname):
    "Read the notebook in `fname`."
    with open(Path(fname),'r') as f: return nbformat.reads(f.read(), as_version=4)

In [3]:
test_nb = read_nb('notebook_export.ipynb')

In [7]:
test_nb['nbformat']

4

In [4]:
test_nb.keys()

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [5]:
test_nb['metadata']

{'kernelspec': {'display_name': 'Python 3',
  'language': 'python',
  'name': 'python3'},
 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
  'file_extension': '.py',
  'mimetype': 'text/x-python',
  'name': 'python',
  'nbconvert_exporter': 'python',
  'pygments_lexer': 'ipython3',
  'version': '3.7.4'}}

In [7]:
f"{test_nb['nbformat']}.{test_nb['nbformat_minor']}"

'4.2'

In [8]:
test_nb['cells'][0]

{'cell_type': 'code',
 'execution_count': 1,
 'metadata': {},
 'outputs': [],
 'source': '# export\nfrom local.imports import *\nfrom local.notebook.core import *\nimport nbformat,inspect\nfrom nbformat.sign import NotebookNotary'}

In [9]:
# export
def check_re(cell, pat, code_only=True):
    "Check if `cell` contains a line with regex `pat`"
    if code_only and cell['cell_type'] != 'code': return
    if isinstance(pat, str): pat = re.compile(pat, re.IGNORECASE | re.MULTILINE)
    return pat.search(cell['source'])

In [10]:
cell = test_nb['cells'][0].copy()
assert check_re(cell, '# export') is not None
assert check_re(cell, re.compile('# export')) is not None
assert check_re(cell, '# bla') is None
cell['cell_type'] = 'markdown'
assert check_re(cell, '# export') is None
assert check_re(cell, '# export', code_only=False) is not None

In [11]:
# export
_re_blank_export = re.compile(r"""
# Matches any line with #export or #exports without any module name:
^         # beginning of line (since re.MULTILINE is passed)
\s*       # any number of whitespace
\#\s*     # # then any number of whitespace
exports?  # export or exports
\s*       # any number of whitespace
$         # end of line (since re.MULTILINE is passed)
""", re.IGNORECASE | re.MULTILINE | re.VERBOSE)

In [12]:
# export
_re_mod_export = re.compile(r"""
# Matches any line with #export or #exports with a module name and catches it in group 1:
^         # beginning of line (since re.MULTILINE is passed)
\s*       # any number of whitespace
\#\s*     # # then any number of whitespace
exports?  # export or exports
\s*       # any number of whitespace
(\S+)     # catch a group with any non-whitespace chars
\s*       # any number of whitespace
$         # end of line (since re.MULTILINE is passed)
""", re.IGNORECASE | re.MULTILINE | re.VERBOSE)

In [13]:
# export
def is_export(cell, default):
    "Check if `cell` is to be exported and returns the name of the module."
    if check_re(cell, _re_blank_export):
        if default is None:
            print(f"This cell doesn't have an export destination and was ignored:\n{cell['source'][1]}")
        return default
    tst = check_re(cell, _re_mod_export)
    return os.path.sep.join(tst.groups()[0].split('.')) if tst else None

In [14]:
cell = test_nb['cells'][0].copy()
assert is_export(cell, 'export') == 'export'
cell['source'] = "# exports" 
assert is_export(cell, 'export') == 'export'
cell['source'] = "# export mod" 
assert is_export(cell, 'export') == 'mod'
cell['source'] = "# export mod.file" 
assert is_export(cell, 'export') == 'mod/file'
cell['source'] = "# expt mod.file"
assert is_export(cell, 'export') is None

In [15]:
# export
_re_default_exp = re.compile(r"""
# Matches any line with #default_exp with a module name and catches it in group 1:
^            # beginning of line (since re.MULTILINE is passed)
\s*          # any number of whitespace
\#\s*        # # then any number of whitespace
default_exp  # export or exports
\s*          # any number of whitespace
(\S+)        # catch a group with any non-whitespace chars
\s*          # any number of whitespace
$            # end of line (since re.MULTILINE is passed)
""", re.IGNORECASE | re.MULTILINE | re.VERBOSE)

In [16]:
# export
def find_default_export(cells):
    "Find in `cells` the default export module."
    for cell in cells:
        tst = check_re(cell, _re_default_exp)
        if tst: return tst.groups()[0]

In [17]:
find_default_export(test_nb['cells'])

'notebook.export'

In [18]:
assert find_default_export(test_nb['cells']) == 'notebook.export'
assert find_default_export(test_nb['cells'][2:]) is None

In [19]:
# export
def _create_mod_file(fname, nb_path):
    "Create a module file for `fname`."
    fname.parent.mkdir(parents=True, exist_ok=True)
    with open(fname, 'w') as f:
        f.write(f"#AUTOGENERATED! DO NOT EDIT! File to edit: dev/{nb_path.name} (unless otherwise specified).")
        f.write('\n\n__all__ = []')

In [20]:
#export
_re_patch_func = re.compile(r"""
# Catches any function decorated with @patch, its name in group 1 and the patched class in group 2
@patch       # At any place in the cell, something that begins with @patch
\s*def       # Any number of whitespace (including a new line probably) followed by def
\s+          # One whitespace or more
([^\(\s]*)   # Catch a group composed of anything but whitespace or an opening parenthesis (name of the function)
\s*\(        # Any number of whitespace followed by an opening parenthesis
[^:]*        # Any number of character different of : (the name of the first arg that is type-annotated)
:\s*         # A column followed by any number of whitespace
([^,\)\s]*)  # Catch a group composed of anything but a comma, a closing parenthesis or whitespace (name of the class)
\s*          # Any number of whitespace
(?:,|\))     # Non-catching group with either a comma or a closing parenthesis
""", re.VERBOSE)

In [21]:
#hide
tst = _re_patch_func.search("""
@patch
def func(obj:Class)""")
assert tst.groups() == ("func", "Class")
tst = _re_patch_func.search("""
@patch
def func (obj:Class, a)""")
assert tst.groups() == ("func", "Class")

In [22]:
#export
_re_class_func_def = re.compile(r"""
# Catches any 0-indented function or class definition with its name in group 1
^              # Beginning of a line (since re.MULTILINE is passed)
(?:def|class)  # Non-catching group for def or class
\s+            # One whitespace or more
([^\(\s]*)     # Catching group with any character except an opening parenthesis or a whitespace (name)
\s*            # Any number of whitespace
(?:\(|:)       # Non-catching group with either an opening parenthesis or a : (classes don't need ())
""", re.MULTILINE | re.VERBOSE)

In [23]:
#hide
assert _re_class_func_def.search("class Class:").groups() == ('Class',)
assert _re_class_func_def.search("def func(a, b):").groups() == ('func',)

In [24]:
#export
_re_obj_def = re.compile(r"""
# Catches any 0-indented object definition (bla = thing) with its name in group 1
^          # Beginning of a line (since re.MULTILINE is passed)
([^=\s]*)  # Catching group with any character except a whitespace or an equal sign
\s*=       # Any number of whitespace followed by an =
""", re.MULTILINE | re.VERBOSE)

In [25]:
#hide
assert _re_obj_def.search("a = 1").groups() == ('a',)
assert _re_obj_def.search("a=1").groups() == ('a',)

In [26]:
# export
def _not_private(n):
    for t in n.split('.'):
        if t.startswith('_'): return False
    return '\\' not in t and '^' not in t and '[' not in t

def export_names(code, func_only=False):
    "Find the names of the objects, functions or classes defined in `code` that are exported."
    #Format monkey-patches with @patch
    code = _re_patch_func.sub(r'def \2.\1() = ', code)
    names = _re_class_func_def.findall(code)
    if not func_only: names += _re_obj_def.findall(code)
    return [n for n in names if _not_private(n)]

In [27]:
assert export_names("def my_func(x):\n  pass\nclass MyClass():") == ["my_func", "MyClass"]
#Indented funcs are ignored (funcs inside a class)
assert export_names("  def my_func(x):\n  pass\nclass MyClass():") == ["MyClass"]
#Private funcs are ignored
assert export_names("def _my_func():\n  pass\nclass MyClass():") == ["MyClass"]
#trailing spaces
assert export_names("def my_func ():\n  pass\nclass MyClass():") == ["my_func", "MyClass"]
#class without parenthesis
assert export_names("def my_func ():\n  pass\nclass MyClass:") == ["my_func", "MyClass"]
#object and funcs
assert export_names("def my_func ():\n  pass\ndefault_bla=[]:") == ["my_func", "default_bla"]
assert export_names("def my_func ():\n  pass\ndefault_bla=[]:", func_only=True) == ["my_func"]
#Private objects are ignored
assert export_names("def my_func ():\n  pass\n_default_bla = []:") == ["my_func"]
#Objects with dots are privates if one part is private
assert export_names("def my_func ():\n  pass\ndefault.bla = []:") == ["my_func", "default.bla"]
assert export_names("def my_func ():\n  pass\ndefault._bla = []:") == ["my_func"]
#Monkey-path with @patch are properly renamed
assert export_names("@patch\ndef my_func(x:Class):\n  pass") == ["Class.my_func"]
assert export_names("@patch\ndef my_func(x:Class):\n  pass", func_only=True) == ["Class.my_func"]
assert export_names("some code\n@patch\ndef my_func(x:Class, y):\n  pass") == ["Class.my_func"]

In [28]:
#export
_re_all_def   = re.compile(r"""
# Catches a cell with defines \_all\_ = [\*\*] and get that \*\* in group 1
^_all_   #  Beginning of line (since re.MULTILINE is passed)
\s*=\s*  #  Any number of whitespace, =, any number of whitespace
\[       #  Opening [
([^\n\]]*) #  Catching group with anything except a ] or newline
\]       #  Closing ]
""", re.MULTILINE | re.VERBOSE)

#Same with __all__
_re__all__def = re.compile(r'^__all__\s*=\s*\[([^\]]*)\]', re.MULTILINE)

In [29]:
# export
def extra_add(code):
    "Catch adds to `__all__` required by a cell with `_all_=`"
    if _re_all_def.search(code):
        names = _re_all_def.search(code).groups()[0]
        names = re.sub('\s*,\s*', ',', names)
        names = names.replace('"', "'")
        code = _re_all_def.sub('', code)
        code = re.sub(r'([^\n]|^)\n*$', r'\1', code)
        return names.split(','),code
    return [],code

In [30]:
assert extra_add('_all_ = ["func", "func1", "func2"]') == (["'func'", "'func1'", "'func2'"],'')
assert extra_add('_all_ = ["func",   "func1" , "func2"]') ==  (["'func'", "'func1'", "'func2'"],'')
assert extra_add("_all_ = ['func','func1', 'func2']\n") ==  (["'func'", "'func1'", "'func2'"],'')
assert extra_add('code\n\n_all_ = ["func", "func1", "func2"]') == (["'func'", "'func1'", "'func2'"],'code')

In [31]:
#export
def _add2add(fname, names, line_width=120):
    if len(names) == 0: return
    with open(fname, 'r') as f: text = f.read()
    tw = TextWrapper(width=120, initial_indent='', subsequent_indent=' '*11, break_long_words=False)
    re_all = _re__all__def.search(text)
    start,end = re_all.start(),re_all.end()
    text_all = tw.wrap(f"{text[start:end-1]}{'' if text[end-2]=='[' else ', '}{', '.join(names)}]")
    with open(fname, 'w') as f: f.write(text[:start] + '\n'.join(text_all) + text[end:])

In [32]:
fname = 'test_add.txt'
with open(fname, 'w') as f: f.write("Bla\n__all__ = [my_file, MyClas]\nBli")
_add2add(fname, ['new_function'])
with open(fname, 'r') as f: 
    assert f.read() == "Bla\n__all__ = [my_file, MyClas, new_function]\nBli"
_add2add(fname, [f'new_function{i}' for i in range(10)])
with open(fname, 'r') as f: 
    assert f.read() == """Bla
__all__ = [my_file, MyClas, new_function, new_function0, new_function1, new_function2, new_function3, new_function4,
           new_function5, new_function6, new_function7, new_function8, new_function9]
Bli"""
os.remove(fname)

In [33]:
# export
def _relative_import(name, fname):
    mods = name.split('.')
    splits = str(fname).split(os.path.sep)
    if mods[0] not in splits: return name
    splits = splits[splits.index(mods[0]):]
    while len(mods)>0 and splits[0] == mods[0]: splits,mods = splits[1:],mods[1:]
    return '.' * (len(splits)) + '.'.join(mods)

In [34]:
assert _relative_import('local.core', Path('local')/'data.py') == '.core'
assert _relative_import('local.core', Path('local')/'vision'/'data.py') == '..core'
assert _relative_import('local.vision.transform', Path('local')/'vision'/'data.py') == '.transform'
assert _relative_import('local.notebook.core', Path('local')/'data'/'external.py') == '..notebook.core'
assert _relative_import('local.vision', Path('local')/'vision'/'learner.py') == '.'

In [35]:
#export
#Catches any from local.bla import something and catches local.bla in group 1, the imported thing(s) in group 2.
_re_import = re.compile(r'^(\s*)from (local.\S*) import (.*)$')

In [36]:
# export
def _deal_import(code_lines, fname):
    pat = re.compile(r'from (local.\S*) import (\S*)$')
    lines = []
    def _replace(m):
        sp,mod,obj = m.groups()
        return f"{sp}from {_relative_import(mod, fname)} import {obj}"
    for line in code_lines:
        line = re.sub('_'+'file_', '__'+'file__', line) #Need to break _file_ or that line will be treated
        lines.append(_re_import.sub(_replace,line))
    return lines

In [37]:
#hide
lines = ["from local.core import *", "nothing to see", "  from local.vision import bla1, bla2", "from local.vision import models"]
assert _deal_import(lines, Path('local')/'data.py') == [
    "from .core import *", "nothing to see", "  from .vision import bla1, bla2", "from .vision import models"
]

In [38]:
#hide
#Tricking jupyter notebook to have a __file__ attribute. All _file_ will be replaced by __file__
_file_ = Path('local').absolute()/'notebook'/'export.py'

In [39]:
#export
def _get_index():
    if not (Path(_file_).parent/'index.txt').exists(): return {}
    return json.load(open(Path(_file_).parent/'index.txt', 'r'))

def _save_index(index):
    fname = Path(_file_).parent/'index.txt'
    fname.parent.mkdir(parents=True, exist_ok=True)
    json.dump(index, open(fname, 'w'), indent=2)

def _reset_index():
    if (Path(_file_).parent/'index.txt').exists():
        os.remove(Path(_file_).parent/'index.txt')

In [40]:
#hide
ind,ind_bak = Path(_file_).parent/'index.txt',Path(_file_).parent/'index.bak'
if ind.exists(): shutil.move(ind, ind_bak)
assert _get_index() == {}
_save_index({'foo':'bar'})
assert _get_index() == {'foo':'bar'}
if ind_bak.exists(): shutil.move(ind_bak, ind)

In [41]:
#export
def _notebook2script(fname, silent=False, to_pkl=False):
    "Finds cells starting with `#export` and puts them into a new module"
    if os.environ.get('IN_TEST',0): return  # don't export if running tests
    fname = Path(fname)
    nb = read_nb(fname)
    default = find_default_export(nb['cells'])
    if default is not None:
        default = os.path.sep.join(default.split('.'))
        if not to_pkl: _create_mod_file(Path.cwd()/'local'/f'{default}.py', fname)
    index = _get_index()
    exports = [is_export(c, default) for c in nb['cells']]
    cells = [(i,c,e) for i,(c,e) in enumerate(zip(nb['cells'],exports)) if e is not None]
    for (i,c,e) in cells:
        fname_out = Path.cwd()/'local'/f'{e}.py'
        orig = ('#C' if e==default else f'#Comes from {fname.name}, c') + 'ell\n'
        code = '\n\n' + orig + '\n'.join(_deal_import(c['source'].split('\n')[1:], fname_out))
        # remove trailing spaces
        names = export_names(code)
        extra,code = extra_add(code)
        if not to_pkl: _add2add(fname_out, [f"'{f}'" for f in names if '.' not in f] + extra)
        index.update({f: fname.name for f in names})
        code = re.sub(r' +$', '', code, flags=re.MULTILINE)
        if code != '\n\n' + orig[:-1]:
            if to_pkl: _update_pkl(fname_out, (i, fname, code))
            else:
                with open(fname_out, 'a') as f: f.write(code)
    _save_index(index)
    if not silent: print(f"Converted {fname}.")

In [42]:
#export 
def _get_sorted_files(all_fs: Union[bool,str], up_to=None):
    "Return the list of files corresponding to `g` in the current dir."
    if (all_fs==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True
    else: ret = glob.glob(all_fs) if isinstance(g,str) else []
    if len(ret)==0: print('WARNING: No files found')
    ret = [f for f in ret if not f.startswith('_')]
    if up_to is not None: ret = [f for f in ret if str(f)<=str(up_to)]
    return sorted(ret)

In [43]:
_notebook2script('notebook_core.ipynb')

Converted notebook_core.ipynb.


In [44]:
#export 
def notebook2script(fname=None, all_fs=None, up_to=None, silent=False, to_pkl=False):
    "Convert `fname` or all the notebook satisfying `all_fs`."
    # initial checks
    if os.environ.get('IN_TEST',0): return  # don't export if running tests
    assert fname or all_fs
    if all_fs: _reset_index()
    if (all_fs is None) and (up_to is not None): all_fs=True # Enable allFiles if upTo is present
    fnames = _get_sorted_files(all_fs, up_to=up_to) if all_fs else [fname]
    [_notebook2script(f, silent=silent, to_pkl=to_pkl) for f in fnames]

In [45]:
#export 
def _get_property_name(p):
    "Get the name of property `p`"
    if hasattr(p, 'fget'):
        return p.fget.func.__qualname__ if hasattr(p.fget, 'func') else p.fget.__qualname__
    else: return next(iter(re.findall(r'\'(.*)\'', str(p)))).split('.')[-1]

def get_name(obj):
    "Get the name of `obj`"
    if hasattr(obj, '__name__'):       return obj.__name__
    elif getattr(obj, '_name', False): return obj._name
    elif hasattr(obj,'__origin__'):    return str(obj.__origin__).split('.')[-1] #for types
    elif type(obj)==property:          return _get_property_name(obj)
    else:                              return str(obj).split('.')[-1]

In [46]:
# export
def qual_name(obj):
    "Get the qualified name of `obj`"
    if hasattr(obj,'__qualname__'): return obj.__qualname__
    if inspect.ismethod(obj):       return f"{get_name(obj.__self__)}.{get_name(fn)}"
    return get_name(obj)

In [47]:
assert get_name(in_ipython) == 'in_ipython'
assert get_name(DocsTestClass.test) == 'test'
# assert get_name(Union[Tensor, float]) == 'Union'

In [48]:
#hide
class PropertyClass:
    p_lambda = property(lambda x: x)
    def some_getter(self): return 7
    p_getter = property(some_getter)

assert get_name(PropertyClass.p_lambda) == 'PropertyClass.<lambda>'
assert get_name(PropertyClass.p_getter) == 'PropertyClass.some_getter'
assert get_name(PropertyClass) == 'PropertyClass'

In [49]:
# export
def source_nb(func, is_name=None, return_all=False):
    "Return the name of the notebook where `func` was defined"
    is_name = is_name or isinstance(func, str)
    index = _get_index()
    name = func if is_name else qual_name(func)
    while len(name) > 0:
        if name in index: return (name,index[name]) if return_all else index[name]
        name = '.'.join(name.split('.')[:-1])

In [50]:
assert qual_name(DocsTestClass) == 'DocsTestClass'
assert qual_name(DocsTestClass.test) == 'DocsTestClass.test'

In [51]:
# export
_re_default_nb = re.compile(r'File to edit: dev/(\S+)\s+')
_re_cell = re.compile(r'^#Cell|^#Comes from\s+(\S+), cell')

In [52]:
#hide
notebook2script(all_fs=True)

Converted KDDCup99 DAGMM.ipynb.
Converted dagmm_main.ipynb.
Converted dagmm_model.ipynb.
This cell doesn't have an export destination and was ignored:
 
Converted dagmm_solver.ipynb.
This cell doesn't have an export destination and was ignored:
 
This cell doesn't have an export destination and was ignored:
 
This cell doesn't have an export destination and was ignored:
 
Converted dagmm_utils.ipynb.
Converted notebook_core.ipynb.
Converted notebook_export.ipynb.
