# Imports

In [1]:
# export
from collections import namedtuple, defaultdict

In [2]:
# export
import os

In [3]:
# export
import re

# Helpers

In [4]:
def run_tests(cases, func):
    for i, (n, c, r) in enumerate(cases):
        print(f'({i + 1} / {len(cases)}) TEST {n}:')
        try:
            res = func(c)
            assert res == r, f'TEST FAILED WITH RESULT: {res}\nEXPECTED: {r}'
            print(f'TEST RESULT: SUCCESS\n')
        except Exception as e:
            print(f'TEST FAILED WITH EXCEPTION:\n{e}\n')
            # raise e
    print('--------------- ALL TESTS COMPLETED ---------------')

# Init

In [5]:
# export
from nbdev.imports import *

In [6]:
create_config('nbdev-rewrite', 'flpeters', nbs_path='.')

In [7]:
if not os.environ.get("IN_TEST", None):
    assert IN_NOTEBOOK
    assert not IN_COLAB
    assert IN_IPYTHON

# Notebook Loading

In [8]:
#export
def read_nb(fname):
    "Read the notebook in `fname`."
    with open(Path(fname),'r', encoding='utf8') as f: return nbformat.reads(f.read(), as_version=4)

In [9]:
test_nb = read_nb('00_export.ipynb')

In [10]:
test_nb.keys()

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [11]:
test_nb['metadata']

{'kernelspec': {'display_name': 'Python 3',
  'language': 'python',
  'name': 'python3'},
 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
  'file_extension': '.py',
  'mimetype': 'text/x-python',
  'name': 'python',
  'nbconvert_exporter': 'python',
  'pygments_lexer': 'ipython3',
  'version': '3.7.3'},
 'toc': {'base_numbering': 1,
  'nav_menu': {},
  'number_sections': True,
  'sideBar': True,
  'skip_h1_title': False,
  'title_cell': 'Table of Contents',
  'title_sidebar': 'Contents',
  'toc_cell': False,
  'toc_position': {},
  'toc_section_display': True,
  'toc_window_display': False}}

In [12]:
f"{test_nb['nbformat']}.{test_nb['nbformat_minor']}"

'4.4'

In [13]:
test_nb['cells'][0]

{'cell_type': 'markdown', 'metadata': {}, 'source': '# Imports'}

In [14]:
len(test_nb['cells'])

55

# Keyword Comments

`detect_comments()` is used to find and extract all comments from a code block.  
It's main purpose is to avoid matching on "comments" that are actually just part of a string, and not real python comments. One example would be: 

    """
    # export
    """
A naive parser would see the literal "#" and match that statement. In reality however, this code snippet is a string, and might be e.g. part of a test suit (which is how this bug was found in the first place), and not really meant to be exported.

In [15]:
# export
def detect_comments(s:str, pure_comments_only:bool=True):
    # TODO: should non-pure comments ever be allowed?
    # TODO: are locations needed?
    in_str, str_enter = False, ''
    comments = list()
    # locations = list()
    for i, line in enumerate(s.splitlines()):
        is_pure_comment = True
        for j, char in enumerate(line):
            if not (char.isspace() or char == '#'): is_pure_comment = False
            if in_str:
                if (char == str_enter): in_str = False
            else:
                if (char == '#'):
                    if pure_comments_only:
                        if is_pure_comment: comments.append(line)# ; locations.append((i, 0))
                    else: comments.append(line[j:])# ; locations.append((i, j))
                    break
                elif char == "'": in_str, str_enter = True, "'"
                elif char == '"': in_str, str_enter = True, '"'
    return comments# , locations

In [16]:
# export
class KeywordParser:
    def __init__(self, *init_keywords):
        self.parsers = {}
        for kw in init_keywords: self.parsers[kw] = self._create_parser(kw)

    def _create_parser(self, keyword):
        # TODO: Tighten down the syntax
        # TODO: Should there be any whitespace allowed before special comments?
        # TODO: Should more than one "#" be allowed for special comments?
        pattern = fr"""
        ^              # start of line, since MULTILINE is passed
        \s*            # any amount of whitespace
        \#+\s*          # literal "#", then any amount of whitespace
        {keyword}(.*)  # keyword followed by arbitrary symbols (except new line)
        $              # end of line, since MULTILINE is passed
        """
        return re.compile(pattern, re.IGNORECASE | re.MULTILINE | re.VERBOSE)

    def __getitem__(self, key):
        if key in self.parsers: return self.parsers[key]
        else:
            parser = self._create_parser(key)
            self.parsers[key] = parser
            return parser
        
    def search(self, key, text):
        return self[key].search('\n'.join(detect_comments(text)))
        
    def _search_remove(self, key, text):
        print('WARNING: _search_remove() DOESN\'T WORK YET')
        # TODO: This function is supposed to remove the keyword comment from the input
        # TODO: detect_comments() has to be modified to allow for the positions to be returned
        parser = self[key]
        text, locations = detect_comments(text)
        for comment, l in zip(text, locations):
            res = parser.search(comment)
            if res: return res, l

In [17]:
# export
OptionsTuple = namedtuple(typename='Options',
                          field_names=['export_target', 'internal'],
                          defaults=[None, False])

In [18]:
# export
_re_legacy_options = re.compile(fr'^(i)?\s*([a-zA-Z0-9]+\S*|)\s*$')
def legacy_parse_options(options:str) -> OptionsTuple:
    res = _re_legacy_options.search(options)
    if res:
        internal, export_target = res.groups()
        return OptionsTuple(export_target=(export_target if export_target else None), internal=(internal is not None))
    else: return None

In [19]:
# export
def parse_options(options:str, legacy:bool=True) -> OptionsTuple:
    if (options is None) or (options == '') or (options.isspace()): return OptionsTuple()
    else:
        if legacy:
            res = legacy_parse_options(options)
            if res: return res
        # TODO: This.
        raise NotImplementedError('this branch of parse_options() is not implemented yet.')

In [20]:
# export
keyword_parser = KeywordParser()
def parse_export(source:str) -> (bool, OptionsTuple):
    # TODO: maybe check for # hide as well to overwrite # export?
    res = keyword_parser.search('export', source)
    if res: return (True, parse_options(res.groups()[0]))
    else: return (False, None)

In [21]:
# export
def find_exports(cells:list, default:str, code_only:bool=True) -> list:
    # check for each cell if it's supposed to be exported and aggregate cell content together with export options
    # remove whitespace at end of lines
    exports = []
    for i, cell in enumerate(cells):
        if code_only and (cell.cell_type != 'code'): continue
        else:
            source = cell.source
            to_export, options = parse_export(source)
            if to_export:
                assert options.export_target or default, f'Cell nr.{i} doesn\'t have an export target, \
                                                           and a default is not specified:\n{source}'
                if not options.export_target: options = options._replace(export_target=default)
                exports.append((source, options))
            else: continue
    return exports

In [22]:
test_nb['cells'][0].keys()

dict_keys(['cell_type', 'metadata', 'source'])

In [23]:
test_nb['cells'][0]

{'cell_type': 'markdown', 'metadata': {}, 'source': '# Imports'}

In [24]:
find_exports(test_nb['cells'], 'export', code_only=True)

[('# export\nfrom collections import namedtuple, defaultdict',
  Options(export_target='export', internal=False)),
 ('# export\nimport os', Options(export_target='export', internal=False)),
 ('# export\nimport re', Options(export_target='export', internal=False)),
 ('# export\nfrom nbdev.imports import *',
  Options(export_target='export', internal=False)),
 ('#export\ndef read_nb(fname):\n    "Read the notebook in `fname`."\n    with open(Path(fname),\'r\', encoding=\'utf8\') as f: return nbformat.reads(f.read(), as_version=4)',
  Options(export_target='export', internal=False)),
 ('# export\ndef detect_comments(s:str, pure_comments_only:bool=True):\n    # TODO: should non-pure comments ever be allowed?\n    # TODO: are locations needed?\n    in_str, str_enter = False, \'\'\n    comments = list()\n    # locations = list()\n    for i, line in enumerate(s.splitlines()):\n        is_pure_comment = True\n        for j, char in enumerate(line):\n            if not (char.isspace() or char =

## Tests

In [225]:
test_strings = [
("trippe quote(''')", """'''
#export
'''""", (False, None)),
('tripple quote(""")', '''"""
#export
"""''', (False, None)),
('single quote(")', '"\
\n#export\n\
"', (False, None)),
("single quote(')", "'\
\n#export\n\
'", (False, None)),
("correct", """
#export
""", (True, OptionsTuple())),
("tricky case 1", """
'this is a string'
#export
'this also, but between is an actual comment'
""", (True, OptionsTuple())),
("tricky case 2", """
  a #export
'''
#export
'''
####export""", (True, OptionsTuple())),
]
run_tests(test_strings, parse_export)

(1 / 7) TEST trippe quote('''):
TEST RESULT: SUCCESS

(2 / 7) TEST tripple quote("""):
TEST RESULT: SUCCESS

(3 / 7) TEST single quote("):
TEST RESULT: SUCCESS

(4 / 7) TEST single quote('):
TEST RESULT: SUCCESS

(5 / 7) TEST correct:
TEST RESULT: SUCCESS

(6 / 7) TEST tricky case 1:
TEST RESULT: SUCCESS

(7 / 7) TEST tricky case 2:
TEST RESULT: SUCCESS

--------------- ALL TESTS COMPLETED ---------------


In [226]:
test_markup = [
('export', """
# export
""", (True, OptionsTuple())),
('comment layout', """
#export
""", (True, OptionsTuple())),
('export internal legacy', """
# exporti
""", (True, OptionsTuple(internal=True))),
('export internal', """
# export -i
""", (True, OptionsTuple(internal=True))),
('export show source', """
# export -s
""", (True, OptionsTuple())),
('export internal show', """
# export -i -s
""", (True, OptionsTuple(internal=True))),
('default empty', """

""", (False, None)),
('hide', """
# hide
""", (False, None)),
('multiple comments', """
# export
# hide
""", (True, OptionsTuple())),
('multi comment same line', """
# export hide
""", (True, OptionsTuple(export_target='hide'))),
('multiple comments default_exp', """
# export
# default_exp
""", (True, OptionsTuple())),
]
run_tests(test_markup, parse_export)

(1 / 11) TEST export:
TEST RESULT: SUCCESS

(2 / 11) TEST comment layout:
TEST RESULT: SUCCESS

(3 / 11) TEST export internal legacy:
TEST RESULT: SUCCESS

(4 / 11) TEST export internal:
TEST FAILED WITH EXCEPTION:
this branch of parse_options() is not implemented yet.

(5 / 11) TEST export show source:
TEST FAILED WITH EXCEPTION:
this branch of parse_options() is not implemented yet.

(6 / 11) TEST export internal show:
TEST FAILED WITH EXCEPTION:
this branch of parse_options() is not implemented yet.

(7 / 11) TEST default empty:
TEST RESULT: SUCCESS

(8 / 11) TEST hide:
TEST RESULT: SUCCESS

(9 / 11) TEST multiple comments:
TEST RESULT: SUCCESS

(10 / 11) TEST multi comment same line:
TEST RESULT: SUCCESS

(11 / 11) TEST multiple comments default_exp:
TEST RESULT: SUCCESS

--------------- ALL TESTS COMPLETED ---------------


# Names

In [25]:
# export
import ast
from ast import iter_fields, AST
import _ast
from pprint import pprint

In [26]:
# export
def print_tree(node):
    if isinstance(node, (list, tuple)):
        for x in node:
            print_tree(x)
    elif hasattr(node, '_fields'):
        for f in node._fields:
            # print(f)
            print_tree(node.__getattribute__(f))
    else:
        print(node)
        # pass

In [27]:
# export
class TestParser(ast.NodeVisitor):
    def visit(self, node):
        method = 'visit_' + node.__class__.__name__
        visitor = getattr(self, method, self.generic_visit)
        print(f'{node.__class__.__name__} -> {visitor.__name__}')
        return visitor(node)
    
    def _default(self, node):
        # pprint(node.__dict__)
        print(f'attr:   {node._attributes}\nfields: {node._fields}\n{"-"*25}')
        
    def visit_Assign(self, node): print(node.targets[0].id) # self._default(node)
    
    def visit_FunctionDef(self, node):
        # self._default(node)
        print(node.name)
#         for d in node.decorator_list:
#             print(self.visit(d))
            
    def visit_ClassDef(self, node): print(node.name) # self._default(node)

In [28]:
# export
def remove_private_names(names):
    to_remove = {n for n in names if n.startswith('_')}
    return names.difference(to_remove)

In [37]:
# export
def update_recursive(node, names=set()):
    """inplace, recursive updating of names"""
    if isinstance(node, (_ast.List, _ast.Tuple)):
        for x in node.elts: update_recursive(x, names)
    elif isinstance(node, (_ast.Name)): names.add(node.id)
    elif isinstance(node, (_ast.Starred)): names.add(node.value.id)
    elif isinstance(node, (list, tuple)):
        for x in node: update_recursive(x, names)
    else: raise Exception(f'Couldn\'t resolve {node} to name, unknown type')

In [30]:
# export
def parse_tree(tree):
    # TODO: find _all_ declarations
    names = set()
    for node in tree.body:
        node_name = node.__class__.__name__
        if   node_name == 'Assign'     : update_recursive(node.targets, names)
        elif node_name == 'FunctionDef': names.add(node.name)
        elif node_name == 'ClassDef'   : names.add(node.name)
        else: pass
    names = remove_private_names(names)
    return names

In [31]:
# export
def find_names(code:str) -> list:
    # print(code)
    tree = ast.parse(code)
    names = parse_tree(tree)
    return names # modify ExportCache to accept a set()

## Tests

In [378]:
test_assignment = [
('Default Assignment', """
a = 1
b = a
a = 2
""", {'a', 'b'}),
('Tuple unpacking', """
a, b = (1, 2)
""", {'a', 'b'}),
('unpacking to tuples and lists', """
(a, b) = (1, 2)
[a, b] = (1, 2)
""", {'a', 'b'}),
('unpacking to tuples and lists x2', """
([a], (b)) = (1, 2)
[[a, ((b))]] = (1, 2)
""", {'a', 'b'}),
('Multiple assignments', """
a = b = 2
""", {'a', 'b'}),
('List Deconstruction', """
head, *tail = [1,2,3,4,5]
""", {'head', 'tail'}),
('Private Variables', """
_a = 1
""", set()),
]
run_tests(test_assignment, find_names)

(1 / 7) TEST Default Assignment:
TEST RESULT: SUCCESS

(2 / 7) TEST Tuple unpacking:
TEST RESULT: SUCCESS

(3 / 7) TEST unpacking to tuples and lists:
TEST RESULT: SUCCESS

(4 / 7) TEST unpacking to tuples and lists x2:
TEST RESULT: SUCCESS

(5 / 7) TEST Multiple assignments:
TEST RESULT: SUCCESS

(6 / 7) TEST List Deconstruction:
TEST RESULT: SUCCESS

(7 / 7) TEST Private Variables:
TEST RESULT: SUCCESS

--------------- ALL TESTS COMPLETED ---------------


In [379]:
test_funcdef = [
('Default function definition', """
def add(a, b):
    return a + b
""", {'add'}),
('Type Annotated function def', """
def calc(a:int, b:int) -> int:
    c:float = 2.0
    return (a + b) * c
""", {'calc'}),
('function decorators', """
@test1
@test2
def add(a, b):
    return a + b
""", {'add'}),
('@patch and more complex type annotations', """
@patch
def func (obj:(Class1, Class2), a:int)->int:
    pass
""", {'func'})
]
run_tests(test_funcdef, find_names)

(1 / 4) TEST Default function definition:
TEST RESULT: SUCCESS

(2 / 4) TEST Type Annotated function def:
TEST RESULT: SUCCESS

(3 / 4) TEST function decorators:
TEST RESULT: SUCCESS

(4 / 4) TEST @patch and more complex type annotations:
TEST RESULT: SUCCESS

--------------- ALL TESTS COMPLETED ---------------


In [380]:
test_classdef = [
('Default class definition', """
class Abc:
    pass
""", {'Abc'}),
('Default class def 2', """
class Abc():
    pass
""", {'Abc'}),
]
run_tests(test_classdef, find_names)

(1 / 2) TEST Default class definition:
TEST RESULT: SUCCESS

(2 / 2) TEST Default class def 2:
TEST RESULT: SUCCESS

--------------- ALL TESTS COMPLETED ---------------


# Export

In [32]:
# export
class ExportCache:
    def __init__(self, default_export=None):
        self.tupletype = namedtuple(typename='exports', field_names=['export_code', 'export_names'])
        self.exports = defaultdict(self._create_exp)
        if default_export is not None: self[default_export]
    
    def _create_exp(self): return self.tupletype(export_code=list(), export_names=set())
    
    def __getitem__(self, key): return self.exports[key]
    
    def add_names(self, key, names): self[key].export_names.update(names)
            
    def add_code(self, key, code): self[key].export_code.append(code)

In [33]:
# export
def find_default_export(cells:list) -> str:
    # search through all cells to find the default_exp keyword and return it's value.
    # syntax checking
    # maybe do some sanity checking
    return 'export'
    pass

In [34]:
# export
def create_mod_file(orig_nbfname, targ_pyfname):
    # create the .py file in the correct folder, with a header saying where it was originally from
    pass

In [35]:
# export
def _notebook2script(cells=None, fname=None, silent=False, to_dict=False):
    """Convert a single notebook"""
    if cells: print('WARNING: The Cells parameter is only used for testing purposes!')
    if fname is not None: raise NotImplementedError('WARNING: fname is a "must pass", but not yet')
    # load notebook content
    # load config
    default = find_default_export(cells)
    if default is None:
        print('WARNING: No default export file found! (should this crash, or see if each export has its own target?)')
    else:
        # maybe this should be done at the bottom, together with all the others
        # create_mod_file(original_nbfile_path, target_pyfile_path) # flipped in original code
        pass
    export_cache = ExportCache(default)
    # load _nbdev file and create a spec from it (no idea why this is needed)
    exports = find_exports(cells, default)
    for j, (code, options)  in enumerate(exports):
        # code = clean_code(code)
        e, i = options.export_target, options.internal
        if not i: export_cache.add_names(e, find_names(code))
        export_cache.add_code(e, code)
    # write_to_export_files(export_cache, default)
    # add names to _nbdev index
    # write code cell to file
    # save _nbdev file
    return export_cache

In [38]:
ec = _notebook2script(test_nb['cells'])



In [39]:
for key in ec.exports.keys():
    exp = ec.exports[key]
    pprint(exp.export_code)
    print('')
    print(exp.export_names)

['# export\nfrom collections import namedtuple, defaultdict',
 '# export\nimport os',
 '# export\nimport re',
 '# export\nfrom nbdev.imports import *',
 '#export\n'
 'def read_nb(fname):\n'
 '    "Read the notebook in `fname`."\n'
 "    with open(Path(fname),'r', encoding='utf8') as f: return "
 'nbformat.reads(f.read(), as_version=4)',
 '# export\n'
 'def detect_comments(s:str, pure_comments_only:bool=True):\n'
 '    # TODO: should non-pure comments ever be allowed?\n'
 '    # TODO: are locations needed?\n'
 "    in_str, str_enter = False, ''\n"
 '    comments = list()\n'
 '    # locations = list()\n'
 '    for i, line in enumerate(s.splitlines()):\n'
 '        is_pure_comment = True\n'
 '        for j, char in enumerate(line):\n'
 "            if not (char.isspace() or char == '#'): is_pure_comment = False\n"
 '            if in_str:\n'
 '                if (char == str_enter): in_str = False\n'
 '            else:\n'
 "                if (char == '#'):\n"
 '                    if pu

In [40]:
#export 
def notebook2script(fname=None, silent=False, to_dict=False):
    "Convert notebooks matching `fname` to modules"
    # initial checks
    if os.environ.get('IN_TEST',0): return  # don't export if running tests
    if fname is None:
        reset_nbdev_module()
        update_version()
        update_baseurl()
        files = [f for f in Config().nbs_path.glob('*.ipynb') if not f.name.startswith('_')]
    else: files = glob.glob(fname)
    d = collections.defaultdict(list) if to_dict else None
    for f in sorted(files): d = _notebook2script(f, silent=silent, to_dict=d)
    if to_dict: return d
    else: add_init(Config().lib_path)