# Parsing Names

In [1]:
import ast
import _ast
from pprint import pprint

In [2]:
def print_tree(node):
    if isinstance(node, (list, tuple)):
        for x in node:
            print_tree(x)
    elif hasattr(node, '_fields'):
        for f in node._fields:
            # print(f)
            print_tree(node.__getattribute__(f))
    else:
        print(node)
        # pass

In [3]:
def run_tests(cases):
    tp = TestParser()
    for i, (n, c, r) in enumerate(cases):
        print(f'({i + 1} / {len(cases)}) TEST {n}:')
        try:
            tree = ast.parse(c)
            res = tp.visit(tree)
            if res == r:
                print(f'TEST RESULT: SUCCESS\n')
            else:
                raise Exception(f'TEST FAILED WITH RESULT: {res}\nEXPECTED: {r}')
        except Exception as e:
            print(f'TEST RESULT:\n{e}\n')
            # raise e
    print('--------------- ALL TESTS COMPLETED ---------------')

In [4]:
class TestParser(ast.NodeVisitor):
    def visit(self, node):
        """Visit a node."""
        method = 'visit_' + node.__class__.__name__
        visitor = getattr(self, method, self.generic_visit)
        print(f'{node.__class__.__name__} -> {visitor.__name__}')
        return visitor(node)
    
    def _default(self, node):
        # pprint(node.__dict__)
        print(f'attr:   {node._attributes}')
        print(f'fields: {node._fields}')
        print('-'*25)
        
    def visit_Assign(self, node):
        self._default(node)
    
    def visit_FunctionDef(self, node):
        self._default(node)
        for d in node.decorator_list:
            print(self.visit(d))
            
    def visit_ClassDef(self, node):
        self._default(node)

## Tests

In [5]:
test_assignment = [
('Default Assignment', """
a = 1
b = a
a = 2
""", None),
('Tuple unpacking', """
a, b = (1, 2)
""", None),
('unpacking to tuples and lists', """
(a, b) = (1, 2)
[a, b] = (1, 2)
""", None),
('Multiple assignments', """
a = b = 2
""", None),
('List Deconstruction', """
head, *tail = [1,2,3,4,5]
""", None),
('Private Variables', """
_a = 1
""", None),
]
run_tests(test_assignment)

(1 / 6) TEST Default Assignment:
Module -> generic_visit
Assign -> visit_Assign
attr:   ('lineno', 'col_offset')
fields: ('targets', 'value')
-------------------------
Assign -> visit_Assign
attr:   ('lineno', 'col_offset')
fields: ('targets', 'value')
-------------------------
Assign -> visit_Assign
attr:   ('lineno', 'col_offset')
fields: ('targets', 'value')
-------------------------
TEST RESULT: SUCCESS

(2 / 6) TEST Tuple unpacking:
Module -> generic_visit
Assign -> visit_Assign
attr:   ('lineno', 'col_offset')
fields: ('targets', 'value')
-------------------------
TEST RESULT: SUCCESS

(3 / 6) TEST unpacking to tuples and lists:
Module -> generic_visit
Assign -> visit_Assign
attr:   ('lineno', 'col_offset')
fields: ('targets', 'value')
-------------------------
Assign -> visit_Assign
attr:   ('lineno', 'col_offset')
fields: ('targets', 'value')
-------------------------
TEST RESULT: SUCCESS

(4 / 6) TEST Multiple assignments:
Module -> generic_visit
Assign -> visit_Assign
attr:  

In [6]:
test_funcdef = [
('Default function definition', """
def add(a, b):
    return a + b
""", None),
('Type Annotated function def', """
def calc(a:int, b:int) -> int:
    c:float = 2.0
    return (a + b) * c
""", None),
('function decorators', """
@test1
@test2
def add(a, b):
    return a + b
""", None),
('@patch and more complex type annotations', """
@patch
def func (obj:(Class1, Class2), a:int)->int:
    pass
""", None)
]
run_tests(test_funcdef)

(1 / 4) TEST Default function definition:
Module -> generic_visit
FunctionDef -> visit_FunctionDef
attr:   ('lineno', 'col_offset')
fields: ('name', 'args', 'body', 'decorator_list', 'returns')
-------------------------
TEST RESULT: SUCCESS

(2 / 4) TEST Type Annotated function def:
Module -> generic_visit
FunctionDef -> visit_FunctionDef
attr:   ('lineno', 'col_offset')
fields: ('name', 'args', 'body', 'decorator_list', 'returns')
-------------------------
TEST RESULT: SUCCESS

(3 / 4) TEST function decorators:
Module -> generic_visit
FunctionDef -> visit_FunctionDef
attr:   ('lineno', 'col_offset')
fields: ('name', 'args', 'body', 'decorator_list', 'returns')
-------------------------
Name -> generic_visit
Load -> generic_visit
None
Name -> generic_visit
Load -> generic_visit
None
TEST RESULT: SUCCESS

(4 / 4) TEST @patch and more complex type annotations:
Module -> generic_visit
FunctionDef -> visit_FunctionDef
attr:   ('lineno', 'col_offset')
fields: ('name', 'args', 'body', 'decor

In [7]:
test_classdef = [
('Default class definition', """
class Abc:
    pass
""", None),
('Default class def 2', """
class Abc():
    pass
""", None),
]
run_tests(test_classdef)

(1 / 2) TEST Default class definition:
Module -> generic_visit
ClassDef -> visit_ClassDef
attr:   ('lineno', 'col_offset')
fields: ('name', 'bases', 'keywords', 'body', 'decorator_list')
-------------------------
TEST RESULT: SUCCESS

(2 / 2) TEST Default class def 2:
Module -> generic_visit
ClassDef -> visit_ClassDef
attr:   ('lineno', 'col_offset')
fields: ('name', 'bases', 'keywords', 'body', 'decorator_list')
-------------------------
TEST RESULT: SUCCESS

--------------- ALL TESTS COMPLETED ---------------


# Markup Comments

In [8]:
import re

In [9]:
def run_markup_tests(cases):
    for i, (n, c, r) in enumerate(cases):
        print(f'({i + 1} / {len(cases)}) TEST {n}:')
        try:
            res = parse_markup(c)
            assert res == r, f'TEST FAILED WITH RESULT: {res}\nEXPECTED: {r}'
            print(f'TEST RESULT: SUCCESS\n')
        except Exception as e:
            print(f'TEST FAILED WITH EXCEPTION:\n{e}\n')
            # raise e
    print('--------------- ALL TESTS COMPLETED ---------------')

## regex

In [10]:
def keywords_to_pattern(keywords):
    def _parse(kw, sw):
        return f'({kw}.*)'
#         if len(sw) == 0: return f'({kw})'
#         elif len(sw) == 1: return fr'({kw}(?:\s+-{sw[0]})*)'
#         else: return fr'({kw}(?:\s+-[{"|".join(sw)}])*)'
    
    if len(keywords) > 0:
        return '\n|'.join([_parse(kw, sw) for kw, [*sw] in keywords])

In [11]:
keywords = [('export', ['s', 'i', 'ss']),
            ('hide', []),
            ('default_exp', ['\S*'])]

In [12]:
pattern = fr"""
^       # start of line, since MULTILINE is passed
\s*     # any amount of whitespace
\#\s*   # literal "#", then any amount of whitespace
{keywords_to_pattern(keywords)}
\s*     # any amount of whitespace
$       # end of line, since MULTILINE is passed
"""
print(pattern)
_re_markup = re.compile(pattern, re.IGNORECASE | re.MULTILINE | re.VERBOSE)


^       # start of line, since MULTILINE is passed
\s*     # any amount of whitespace
\#\s*   # literal "#", then any amount of whitespace
(export.*)
|(hide.*)
|(default_exp.*)
\s*     # any amount of whitespace
$       # end of line, since MULTILINE is passed



In [13]:
_re_markup.search('# export').groups()

('export', None, None)

In [14]:
def parse_markup(cell):
    res = _re_markup.search(cell)
    if res: return res.groups()
    else: return None

## other version

In [15]:
def one_of(options): return '|'.join(options)

In [77]:
class KeywordParser:
    def __init__(self):
        self.parsers = {}
        
    
    def _create_parser(self, keyword):
        pattern = fr"""
        ^              # start of line, since MULTILINE is passed
        \s*            # any amount of whitespace
        \#\s*          # literal "#", then any amount of whitespace
        ({keyword}.*)  # keyword followed by arbitrary symbols (except new line)
        $              # end of line, since MULTILINE is passed
        """
        return re.compile(pattern, re.IGNORECASE | re.MULTILINE | re.VERBOSE)
        
    def __getitem__(self, key):
        if key in self.parsers: return self.parsers[key]
        else:
            parser = self._create_parser(key)
            self.parsers[key] = parser
            return parser

In [78]:
KWP = KeywordParser()

In [79]:
KWP['export']

re.compile(r'\n        ^              # start of line, since MULTILINE is passed\n        \s*            # any amount of whitespace\n        \#\s*          # literal "#", then any amount of whitespace\n        (export.*)  # keyword followed by arbitrary symbols (except new line)\n        $              # end of line, since MULTILINE is passed\n        ',
re.IGNORECASE|re.MULTILINE|re.UNICODE|re.VERBOSE)

In [80]:
KWP.parsers

{'export': re.compile(r'\n        ^              # start of line, since MULTILINE is passed\n        \s*            # any amount of whitespace\n        \#\s*          # literal "#", then any amount of whitespace\n        (export.*)  # keyword followed by arbitrary symbols (except new line)\n        $              # end of line, since MULTILINE is passed\n        ',
 re.IGNORECASE|re.MULTILINE|re.UNICODE|re.VERBOSE)}

In [None]:
mutually_exclusive_keywords = ['export', 'hide', 'show']

In [45]:
KWP = KeywordParser()
def parse_markup(cell):
    to_export = dict()
    res = _re_markup.search(cell)
    for (kw, sw) in keywords:
        res = KWP[kw].search(cell)
        
    if res:
        groups = res.groups()
        for (kw, sw), found in zip(keywords, groups):
            print(f'{kw} | {found} | {kw == found}')
            if bool(found):
                assert found.startswith(kw), f'keyword does not equal regex result: {kw} != {found}'
                #found = normalize_whitespace(found)
                #found = found.split()
                #options_for_cell = parse_keyword(found, kw, sw)
                
                #return options_for_cell
        return groups
    else: return None

In [None]:
def parse_markup(cells:list, KWP:KeywordParser) -> list:
    

## Tests

In [46]:
test_markup = [
('export', """
# export
""", (1, 1, 1, 0)),
('export internal', """
# export -i
""", (1, 0, 0, 0)),
('export show source', """
# export -s
""", (1, 1, 1, 1)),
('export internal show', """
# export -i -s
""", (1, 0, 1, 1)),
('default empty', """

""", None),
('hide', """
# hide
""", (0, 0, 0, 0)),
('layout 1', """
#export
""", (1, 1, 1, 0)),
('layout 2', """
 # export
""", (1, 1, 1, 0)),
('multiple comments', """
# export
# hide
""", None),
('multi comment same line', """
# export hide
""", None),
('multiple comments default_exp', """
# export
# default_exp
""", None),
]
run_markup_tests(test_markup)

(1 / 11) TEST export:
export | export | True
TEST FAILED WITH EXCEPTION:
TEST FAILED WITH RESULT: ('export',)
EXPECTED: (1, 1, 1, 0)

(2 / 11) TEST export internal:
export | export -i | False
TEST FAILED WITH EXCEPTION:
TEST FAILED WITH RESULT: ('export -i',)
EXPECTED: (1, 0, 0, 0)

(3 / 11) TEST export show source:
export | export -s | False
TEST FAILED WITH EXCEPTION:
TEST FAILED WITH RESULT: ('export -s',)
EXPECTED: (1, 1, 1, 1)

(4 / 11) TEST export internal show:
export | export -i -s | False
TEST FAILED WITH EXCEPTION:
TEST FAILED WITH RESULT: ('export -i -s',)
EXPECTED: (1, 0, 1, 1)

(5 / 11) TEST default empty:
TEST RESULT: SUCCESS

(6 / 11) TEST hide:
TEST FAILED WITH EXCEPTION:
TEST FAILED WITH RESULT: None
EXPECTED: (0, 0, 0, 0)

(7 / 11) TEST layout 1:
export | export | True
TEST FAILED WITH EXCEPTION:
TEST FAILED WITH RESULT: ('export',)
EXPECTED: (1, 1, 1, 0)

(8 / 11) TEST layout 2:
export | export | True
TEST FAILED WITH EXCEPTION:
TEST FAILED WITH RESULT: ('export',)
E

In [262]:
(0, 0, 0, 0), # -> # hide
(0, 0, 1, 0), # -> nothing (default)
(0, 0, 1, 1), # -> # show source
(1, 0, 0, 0), # -> # export internal
(1, 0, 1, 0), # -> # export internal show
(1, 0, 1, 1), # -> # export internal show source
(1, 1, 0, 0), # -> # export hide
(1, 1, 1, 0), # -> # export
(1, 1, 1, 1), # -> # export show source

((1, 1, 1, 1),)

# Export

In [155]:
from collections import namedtuple, defaultdict

In [217]:
class ExportCache:
    def __init__(self, default_export=None):
        self.tupletype = namedtuple(typename='exports', field_names=['export_code', 'export_names'])
        self.exports = defaultdict(self._create_exp)
        if default_export is not None: self[default_export]
    
    def _create_exp(self): return self.tupletype(export_code=list(), export_names=set())
    
    def __getitem__(self, key): return self.exports[key]
    
    def add_names(self, key, names):
        target = self[key].export_names
        for name in names: target.add(name)
            
    def add_code(self, key, code): self[key].export_code.append(code)

In [221]:
def find_default_export(cells:list) -> str:
    # search through all cells to find the default_exp keyword and return it's value.
    # syntax checking
    # maybe do some sanity checking
    pass

In [227]:
def create_mod_file(orig_nbfname, targ_pyfname):
    # create the .py file in the correct folder, with a header saying where it was originally from
    pass

In [222]:
def find_exports(cells:list, default:str) -> list:
    # check for each cell if it's supposed to be exported and aggregate cell content together with export options
    # remove whitespace at end of lines
    pass

In [226]:
def find_names(code:str) -> list:
    # find function and variable names in this code block
    # find _all_ declarations
    pass

In [228]:
def notebook2script(cells, fname=None, silent=False, to_dict=False):
    if cells: print('cells is only used for testing purposes!')
    if fname is not None: raise NotImplementedError('fname is a "must pass", but not yet')
    # load notebook content
    # load config
    default = find_default_export(cells)
    if default is None:
        print('WARNING: No default export file found! (should this crash, or see if each export has its own target?)')
    else:
        # maybe this should be done at the bottom, together with all the others
        # create_mod_file(original_nbfile_path, target_pyfile_path) # flipped in original code
        pass
    export_cache = ExportCache(default)
    # load _nbdev file and create a spec from it (no idea why this is needed)
    exports = find_exports(cells, default)
    for i, (code, internal, export_target)  in enumerate(exports):
        # code = clean_code(code)
        if not internal: export_cache.add_names(export_target, find_names(code))
        export_cache.add_code(export_target, code)
    write_to_export_files(export_cache, default)
    # add names to _nbdev index
    # write code cell to file
    # save _nbdev file
    pass