# Setup [setup.py]

In [None]:
# +default_exp -to_path ../setup.py -scoped -no_dunder_all

In [None]:
# +export
import sys
from pkg_resources import parse_version
from configparser import ConfigParser
from setuptools import setup, find_packages, __version__ as setuptools_version
assert parse_version(setuptools_version)>=parse_version('36.2')

In [None]:
# +export
CFG_NAMES = ['settings.ini'] # NOTE: Add alternative names/paths here
DEFAULT_SECTION = 'DEFAULT'

In [None]:
CFG_NAMES.append('../settings.ini')

In [None]:
# +export
# NOTE: Load Config
config = ConfigParser(default_section=DEFAULT_SECTION)
files_found = config.read(CFG_NAMES)
if   len(files_found) <= 0:
    raise FileNotFoundError(f'No Config file could not be found.\n\t\tExpected one of: {CFG_NAMES}')
elif len(files_found) >  1:
    raise ValueError(f'More than one Config file found: {files_found}')
file_found = files_found[0]

In [None]:
# +export
key_err = KeyError(f'{file_found} does not have a section titled {DEFAULT_SECTION}')
try: cfg = dict(config[DEFAULT_SECTION])
except KeyError: raise key_err from None
if len(cfg) == 0: raise key_err

In [None]:
# +export
# NOTE: Handle cmd line arguments
if len(sys.argv)>1 and sys.argv[1]=='version':
    if 'version' in cfg:
        print(cfg['version'])
        exit()
    else: raise KeyError(f"No 'version' is present in {file_found}")

In [None]:
# +export
# NOTE: Parse Config Metadata
# https://docs.python.org/3/distutils/setupscript.html
# https://packaging.python.org/specifications/

In [None]:
# +export
if not ('lib_name' in cfg): raise KeyError(f"Missing 'lib_name' in {file_found}")

In [None]:
# +export
metadata = dict(name = cfg.pop('lib_name'), # POP
                entry_points = dict(console_scripts = list()),
                install_requires = list(), # ['pip', 'packaging']
                extras_require = dict(),
                classifiers = list(),
                project_urls = dict(),
               )

In [None]:
metadata

{'name': 'nbdev_rewrite',
 'entry_points': {'console_scripts': []},
 'install_requires': [],
 'extras_require': {},
 'classifiers': [],
 'project_urls': {}}

In [None]:
# +export
# NOTE: license
# NOTE: classifiers, license
if 'license' in cfg:
    # https://spdx.org/licenses/
    # https://pypi.org/classifiers/
    # https://choosealicense.com/licenses/
    L = {'Apache-2.0' : ('Apache License 2.0',
                         'OSI Approved :: Apache Software License'),
         'MIT'        : ('MIT License',
                         'OSI Approved :: MIT License'),
         'GPL-3.0'    : ('GNU General Public License v3.0 only',
                         'OSI Approved :: GNU General Public License v3 (GPLv3)'),
         'Unlicense'  : ('The Unlicense',
                         'OSI Approved :: The Unlicense (Unlicense)'),
        }
    aliases = {'apache2'   : 'Apache-2.0',
               'mit'       : 'MIT',
               'gpl3'      : 'GPL-3.0',
               'unlicense' : 'Unlicense',
              }
    license = cfg['license'] # POP
    if license in L:
        L0, L1 = L[license]
    elif license in aliases:
        L0, L1 = L[aliases[license]]
    else:
        raise ValueError(f"License identifier '{license}' in '{file_found}' is not recognized.\n"\
                         f"\tAvailable identifiers are: {list(L.keys())}\n\tAliases: {aliases}.") from None
    metadata['license'] = L0
    metadata['classifiers'].append(f'License :: {L1}')

In [None]:
# +export
# NOTE: python_requires
# NOTE: classifiers: Programming Language
if 'min_python' in cfg:
    py_v = '2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9'.split()
    min_python = cfg['min_python'] # POP
    assert '2.0' <= min_python, "A python version below '2.0' is not possible."
    if min_python not in py_v:
        print(f"[WARNING]: Minimum Python version '{min_python}' in '{file_found}' is not recognized.\n"\
              f"           Recognized versions are: {py_v}")
    metadata['python_requires'] = f'>={min_python}'
    
    metadata['classifiers'].append('Programming Language :: Python')
    if   '2.0' <= min_python < '3.0':
        metadata['classifiers'].append('Programming Language :: Python :: 2')
    elif '3.0' <= min_python < '4.0':
        metadata['classifiers'].append('Programming Language :: Python :: 3')
        metadata['classifiers'].append('Programming Language :: Python :: 3 :: Only')
    if min_python in py_v:
        metadata['classifiers'].extend([f'Programming Language :: Python :: {v}' for v in
                                        py_v[py_v.index(min_python):]])

In [None]:
# +export
# NOTE: classifiers: Development Status
if 'status' in cfg:
    statuses = ['1 - Planning', '2 - Pre-Alpha', '3 - Alpha', '4 - Beta',
                '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
    status = cfg['status'] # POP
    try:
        metadata['classifiers'].append(f'Development Status :: {statuses[int(status)]}')
    except (ValueError, IndexError):
        raise ValueError(f"Status '{status}' is an invalid value in '{file_found}'. \n"\
                         "It can only take on one of the following: {'1', '2', '3', '4', '5', '6', '7'}") from None

In [None]:
# +export
# NOTE: classifiers: Natural Language
if 'language' in cfg:
    metadata['classifiers'].append(f"Natural Language :: {cfg['language'].title()}") # POP

In [None]:
# +export
# NOTE: classifiers: Intended Audience
if 'audience' in cfg:
    # TODO: Support lists as well?
    #       There can be multiple indended audiences e.g. 'Developers' + 'Science/Research'
    metadata['classifiers'].append(f"Intended Audience :: {cfg['audience'].title()}") # POP

In [None]:
# +export
# TODO: Add 'Operating System' identifier as well?

In [None]:
# +export
# NOTE: install_requires
if 'requirements' in cfg:
    metadata['install_requires'].extend(cfg['requirements'].split()) # POP

In [None]:
# +export
# NOTE: extras_require
if 'dev_requirements' in cfg:
    metadata['extras_require']['dev'] = cfg['dev_requirements'].split() # POP

In [None]:
# +export
# NOTE: entry_points: console_scripts
if 'console_scripts' in cfg:
    metadata['entry_points']['console_scripts'].extend(cfg['console_scripts'].split()) # POP

In [None]:
# +export
# NOTE: long_description
if not 'long_description' in cfg: # NOTE: Allow for long_description to be overwritten in settings.ini
    try: metadata['long_description'] = open('README.md').read()
    except FileNotFoundError:
        if 'description' in cfg:
            metadata['long_description'] = cfg['description'] # NOT POP
        else: pass

In [None]:
# +export
# NOTE: url
# NOTE: download_url
# NOTE: project_urls: Source Code
if 'git_url' in cfg:
    git_url = cfg['git_url'] # POP
    if (not 'url' in cfg):
        metadata['url'] = git_url # homepage
    if (not 'download_url' in cfg):
        metadata['download_url'] = git_url # TODO: use pipy url
    if (not 'source_url' in cfg):
        metadata['project_urls']['Source Code'] = git_url

In [None]:
# +export
# NOTE: project_urls: Documentation
if 'doc_host' in cfg:
    doc_url = cfg['doc_host'] # POP
    if 'doc_baseurl' in cfg: doc_url += cfg['doc_baseurl'] # POP
    metadata['project_urls']['Documentation'] = doc_url

In [None]:
# +export
# NOTE: project_urls: Bug Tracker
if 'bug_tracker_url' in cfg: # issues
    metadata['project_urls']['Bug Tracker'] = cfg['bug_tracker_url'] # POP

In [None]:
# +export
setup_kwargs = {k: cfg[k] for k in ['lib_name', 'author', 'author_email', 'maintainer',
                                 'maintainer_email', 'version', 'description', 'keywords',]
                if k in cfg}

In [None]:
# +export
setup_kwargs.update(packages                      = find_packages(where='.'),
                    include_package_data          = True,
                    long_description_content_type = 'text/markdown',
                    zip_safe                      = False,
                   )

In [None]:
setup_kwargs

{'author': 'Florian Peters',
 'version': '0.0.1',
 'packages': [],
 'include_package_data': True,
 'long_description_content_type': 'text/markdown',
 'zip_safe': False}

In [None]:
# +export
setup_call = {**metadata, **setup_kwargs}

In [None]:
# +export -from_string
r"""
setup(**setup_call)
""";

In [None]:
setup_call

{'name': 'nbdev_rewrite',
 'entry_points': {'console_scripts': []},
 'install_requires': [],
 'extras_require': {},
 'classifiers': ['License :: OSI Approved :: Apache Software License',
  'Programming Language :: Python',
  'Programming Language :: Python :: 3',
  'Programming Language :: Python :: 3 :: Only',
  'Programming Language :: Python :: 3.7',
  'Programming Language :: Python :: 3.8',
  'Programming Language :: Python :: 3.9',
  'Development Status :: 3 - Alpha',
  'Natural Language :: English',
  'Intended Audience :: Developers'],
 'project_urls': {'Source Code': 'https://github.com/flpeters/nbdev_rewrite/tree/master/',
  'Documentation': 'https://flpeters.github.io/nbdev_rewrite/',
  'Bug Tracker': 'https://github.com/flpeters/nbdev_rewrite/issues'},
 'license': 'Apache License 2.0',
 'python_requires': '>=3.7',
 'url': 'https://github.com/flpeters/nbdev_rewrite/tree/master/',
 'download_url': 'https://github.com/flpeters/nbdev_rewrite/tree/master/',
 'author': 'Florian P

## Notes

unused args:
```python
nbs_path
lib_path
doc_path
title
dep_links # dependency_links
git_user
host
branch
repo_name
company_name
```
modified args:
```python
license
min_python
status
language
audience
requirements
dev_requirements
console_scripts
git_url
doc_host
doc_baseurl
bug_tracker_url
```
modified unlisted kwargs:
```python
long_description
url, download_url, source_url
dev_requirements
```
unmodified args:
```python
lib_name
author, author_email
maintainer, maintainer_email
version
description
keywords
```

other args:
```python
copyright
```



```python
import distutils
distutils.core.setup_keywords
'distclass',
'script_name',
'script_args',
'options',
    'name',
    'version',
    'author',
    'author_email',
    'maintainer',
    'maintainer_email',
    'url',
    'license',
    'description',
    'long_description',
    'keywords',
'platforms',
    'classifiers',
    'download_url',
'requires',
'provides',
'obsoletes'
```

```python
metadata = dict(
    # name='numpy',
    # maintainer="NumPy Developers",
    # maintainer_email="numpy-discussion@python.org",
    # description=DOCLINES[0],
    # long_description="\n".join(DOCLINES[2:]),
    # url="https://www.numpy.org",
    # author="Travis E. Oliphant et al.",
    # download_url="https://pypi.python.org/pypi/numpy",
    # project_urls={
    #     "Bug Tracker": "https://github.com/numpy/numpy/issues",
    #     "Documentation": get_docs_url(),
    #     "Source Code": "https://github.com/numpy/numpy",
    # },
    # license='BSD',
    # classifiers=[_f for _f in CLASSIFIERS.split('\n') if _f],
    platforms=["Windows", "Linux", "Solaris", "Mac OS-X", "Unix"],
    test_suite='pytest',
    version=versioneer.get_version(),
    cmdclass=cmdclass,
    # python_requires='>=3.7',
    zip_safe=False,
    # entry_points={
    #     'console_scripts': f2py_cmds
    # }
)
```

```python
setup(
    # name = cfg['lib_name'], # lib_name
    # license = lic[0], # license
    # classifiers = [
    #     'Development Status :: ' + statuses[int(cfg['status'])], # status
    #     'Intended Audience :: ' + cfg['audience'].title(), # audience
    #     'License :: ' + lic[1], # license
    #     'Natural Language :: ' + cfg['language'].title(), # language
    # ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]], # min_python
    # url = cfg['git_url'], # git_url
    packages = find_packages(),
    include_package_data = True,
    # install_requires = requirements, # requirements
    # extras_require={ 'dev': dev_requirements }, # dev_requirements
    # python_requires  = '>=' + cfg['min_python'], # min_python
    # long_description = long_description, # README.md
    long_description_content_type = 'text/markdown',
    zip_safe = False,
    # entry_points = { 'console_scripts': cfg.get('console_scripts','').split() }, # console_scripts
    **setup_cfg)
```

# Argument Parsing [Module]

In [None]:
# +default_exp -to argument_parsing -scoped

In [None]:
# +export
# This Flag allows anyone to know if this Module exists in their namespace
MODULE__ARGUMENT_PARSING__FLAG = None

## Logging

In [None]:
# +export -internal
arg_parse_REPORT_ERROR  :bool = True
arg_parse_REPORT_WARNING:bool = True
arg_parse_RAISE_ERROR  :bool  = False
arg_parse_RAISE_WARNING:bool  = False
arg_parse_SILENT:bool = False

In [None]:
# +export
def set_arg_parse_report_options(report_error:bool=True, report_warning:bool=True,
                                 raise_error:bool=False, raise_warning:bool=False,
                                 silent=False):
    "Set options for how the Argument Parsing Module will behave on encountering errors or warnings.\n"\
    "Raise causes an exception to be raised, and it supersedes report.\n"\
    "Report prints the information and then continues. If raise is set, then this setting is ignored."\
    "Silent overwrites all other settings and causes all errors and warnings to be ignored."\
    "The priority is thus: silent > raise > report"
    global arg_parse_REPORT_ERROR, arg_parse_REPORT_WARNING
    global arg_parse_RAISE_ERROR, arg_parse_RAISE_WARNING
    global arg_parse_SILENT
    arg_parse_REPORT_ERROR, arg_parse_REPORT_WARNING = report_error, report_warning
    arg_parse_RAISE_ERROR , arg_parse_RAISE_WARNING  = raise_error , raise_warning
    arg_parse_SILENT = (silent or not (report_error and report_warning and raise_error and raise_warning))

In [None]:
# +export -internal
def report_error(err:Exception):
    if   arg_parse_SILENT: pass
    elif arg_parse_RAISE_ERROR : raise err
    elif arg_parse_REPORT_ERROR: print(f'[{err.__class__.__name__}]: {err}')

In [None]:
# +export -internal
def report_warning(warn:str):
    if   arg_parse_SILENT: pass
    elif arg_parse_RAISE_WARNING : raise Warning(warn)
    elif arg_parse_REPORT_WARNING: print(f'[Warning]: {warn}')

## Next Argument

This is just a fancy way of advancing the cursor and checking for out of bounds.

In [None]:
# +export -internal
def get_next_argument(args:list, name:str, cursor:int, suppress_error:bool=False) -> (bool, int, str):
    "Gets the next argument from the list.\nReturns success, the cursor, and the next argument"
    cursor_1 = cursor + 1
    try: return True, cursor_1, args[cursor_1]
    except IndexError:
        if not suppress_error:
            report_error(SyntaxError(f"End of arguments reached. Missing a value for argument '{name}' at position {cursor_1}"))
        return False, cursor, ''

### Examples

In [None]:
get_next_argument(['a', 'b', 'c'], 'b', 1)

(True, 2, 'c')

In [None]:
get_next_argument(['a', 'b', 'c'], 'c', 2)

[SyntaxError]: End of arguments reached. Missing a value for argument 'c' at position 3


(False, 2, '')

In [None]:
get_next_argument(['a', 'b', 'c'], 'c', 2, suppress_error=True)

(False, 2, '')

## Type conversion

The input to Argument Parsing is just a string, so values have to be converted based on the information provided by the caller.  These function help to do that in a safe way.

In [None]:
# +export -internal
def to_integer(value:str) -> (bool, int, float):
    "Try converting a str to int.\nReturn success, the value, and possibly a float remainder."
    try:
        f_value = float(value)
        int_value = int(f_value)
        remainder = f_value - int_value
    except: return False, value, None
    return True, int_value, remainder

In [None]:
to_integer('-2.1'), to_integer('nice')

((True, -2, -0.10000000000000009), (False, 'nice', None))

In [None]:
# +export -internal
def to_float(value:str) -> (bool, float):
    "Try converting a str to float.\nReturn success, and the value."
    # TODO: check if 'inf', 'nan', ...?
    try   : return True , float(value)
    except: return False, value

In [None]:
to_float('-1e-3'), to_float('nan'), to_float('nice')

((True, -0.001), (True, nan), (False, 'nice'))

In [None]:
# +export -internal
def to_bool(value:str) -> (bool, bool):
    """Try converting a str to bool.
    'True' and 'False' are recognized, otherwise the value is cast to float, and then to bool.
    Return success, and the value."""
    if value == 'True' : return True, True
    if value == 'False': return True, False
    try   : return True , bool(float(value))
    except: return False, value

In [None]:
to_bool('1'), to_bool('0'), to_bool('True'), to_bool('False'), to_bool('abc')

((True, True), (True, False), (True, True), (True, False), (False, 'abc'))

In [None]:
# +export -internal
def to_unbounded_array(args:list, cursor:int) -> (bool, int, list):
    """Consume any number of values until either reaching the end of args,
    or until finding a value starting with '-', denoting the beginning of a new argument.
    Return success, the cursor, and the list of values.
    Currently this can't actually fail... don't use unbounded lists kids."""
    values = []
    while True:
        string_success, cursor, value = get_next_argument(args, None, cursor, suppress_error=True)
        if string_success:
            if value[0] != '-': values.append(value)
            else: # value starting with '-' means it's the next command
                cursor -= 1
                break
        else: break
    return True, cursor, values

In [None]:
to_unbounded_array(['-list', '1', '2', '-3'], 0)

(True, 2, ['1', '2'])

In [None]:
# +export -internal
def typify(type_or_value:object) -> (type, object):
    """Takes a type or a value.
    Returns a tuple of the type (or type of the value) and value (or None)"""
    return (type_or_value, None) if isinstance(type_or_value, type) else (type(type_or_value), type_or_value)

In [None]:
typify((int, int)*2)

(tuple, (int, int, int, int))

## Parsing

In [None]:
# +export
def parse_arguments(command:dict, args:[str]) -> (bool, dict, dict):
    "Finds, casts, and returns values from command, in the given comment."    
    # TODO: check that the type of all commands is supported ahead of time?
    # TODO: handle quoted arguments?
    # TODO: support command aliases?
    members = command.keys()
    result  = command.copy()
    is_set  = {member : False for member in members}
    state   = {'args': args, 'name': '', 'cursor': 0, 'inside_array': False,}
    success = True
    while state['cursor'] < len(args): # for arg in args:
        arg = args[state['cursor']]
        if arg[0] != '-':
            report_error(SyntaxError(f"Argument {state['cursor']} does not start with a '-'."))
            return False, result, is_set
        arg = arg[1:] # remove '-'
        state['name'] = arg # TODO: check that len(arg) > 0?
        
        for key in members: # loop over keys of command (the things we're supposed to find)
            if key != arg: continue    
            if is_set[key]:
                report_error(SyntaxError(f"Argument {state['cursor']} ('{arg}') was given multiple times."))
                success = False
            else:
                arg_type, arg_default = typify(command[key])
                member_success = handle_one_argument(result, state, arg_type, arg_default)
                if member_success: is_set[key] = True
                else: success = False
            break # once we have found the correct struct member, stop!
        else: # TODO: improve this msg. maybe: "is not part of the command"?
            report_error(SyntaxError(f"Argument {state['cursor']} ('{arg}') is not valid."))
            success = False
        if not success: break # stop at first error
        state['cursor'] += 1
        
    if success: success = check_is_set(result, is_set)
    return success, result, is_set

In [None]:
# +export -internal
def handle_one_argument(result:dict, state:dict, arg_type:type, arg_default:object) -> bool:
    "Parse the input args based on arg_type, and set arg_name in result to that value."
    # NOTE: 'state' and 'result' are references not values, and modified from here.
    args     = state['args']
    arg_name = state['name']
    success  = True
    if arg_type == str:
        # get the next argument, advance cursor, set success
        string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
        # TODO: how to handle strings that start with a '-'
        if string_success: result[arg_name] = value
        else: success = False

    elif arg_type == bool:
        if state['inside_array']:
            string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
            if string_success:
                bool_success, value = to_bool(value)
                if bool_success: result[arg_name] = value
                else:
                    report_error(ValueError(f"Value of argument {state['cursor']-1} ('{arg_name}') "\
                    f"was not convertable to bool. Please use 'True', 'False', '0', or '1'. (It was '{value}')"))
                    success = False
            else: success = False
        # special case where supplying the argument means True and not supplying it means use the default (False)
        else: result[arg_name] = True

    elif arg_type == int:
        # get the next argument, cast to int, check for remainder, advance cursor, set success
        string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
        if not string_success: return False
        int_success, value, remainder = to_integer(value)
        if int_success:
            result[arg_name] = value
            if remainder:
                report_warning("Junk on the end of the value for int argument "\
                              f"{state['cursor']-1} ('{arg_name}'): {remainder}")
        else:
            report_error(ValueError(f"Value of argument {state['cursor']-1} ('{arg_name}') "\
                                    f"was not an int. (It was '{value}')"))
            success = False

    elif arg_type == float:
        # get the next argument, cast to float, advance cursor, set success
        string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
        if not string_success: return False
        float_success, value = to_float(value)
        if float_success: result[arg_name] = value
        else:
            report_error(ValueError(f"Value of argument {state['cursor']-1} ('{arg_name}') "\
                                    f"was not a float. (It was '{value}')"))
            success = False

    elif arg_type == list or arg_type == tuple:
        if arg_default is None: # unbounded list / tuple
            if state['inside_array']:
                report_error(SyntaxError(f"Using an unbounded list or tuple inside an array is not supported."))
                return False
            array_success, state['cursor'], value = to_unbounded_array(args, state['cursor'])
            if array_success: # NOTE: currently this can't actually fail... don't use unbounded lists kids.
                result[arg_name] = arg_type(value)
            else: success = False
            
        else: # predefined list
            s = {'args': args, 'name': 'v', 'cursor': state['cursor'], 'inside_array': True}
            value = []
            for i, x in enumerate(arg_default):
                t, d = typify(x)
                n = f'{arg_name}[{i}]'
                s['name'] = n
                r = {n:d}
                member_success = handle_one_argument(r, s, t, d)
                if member_success: value.append(r[n])
                else: # TODO: Improve error message
                    # report_error(SyntaxError(f"Array argument {state['cursor']} ('{arg_name}') was not passed correctly."))
                    return False
            state['cursor'] = s['cursor']
            result[arg_name] = arg_type(value)

    else:
        report_error(TypeError(f"Argument {state['cursor']} ('{arg_name}') is of unsupported type {arg_type}."))
        success = False
        
    return success

In [None]:
# +export -internal
def check_is_set(result:dict, is_set:dict) -> bool:
    "Check if any required values (those without defaults), haven't been set yet"
    success = True
    for member, v_is_set in is_set.items():
        if v_is_set: continue
        arg_type, arg_default = typify(result[member])
        if arg_default is None: 
            if arg_type == bool: # NOTE: Special case, not setting a boolean means it's False.
                result[member] = False
                continue
            report_error(ValueError(f"Argument '{member}' has not been set, and no default value was given."))
            success = False
        elif (arg_type == list) or (arg_type == tuple): # this is a bounded list
            # generate list of names with python indexing syntax for better error reporting.
            name = [f'{member}[{i}]' for i in range(len(arg_default))]
            # create a new 'result' dict, mapping the 'idx names' to each of the values of the list.
            r = {n:x for n, x in zip(name, arg_default)}
            # since the entire list hasn't been set, each part of the list has also not been set.
            s = {n:False for n in r}
            # recurse, treating the members of the list as if they comprised a separate command.
            is_set_success = check_is_set(r, s)
            if is_set_success: # re-set result if all members of the list have a default value.
                result[member] = arg_type([r[n] for n in name])
                continue
            else: success = False
    return success

## Documentation

This argument parser is largely inspired by these two videos by Jonathan Blow.
>[Part 1](https://youtu.be/TwqXTf7VfZk)  
>[Part 2](https://youtu.be/pgiVrhsGkKY)

This module besically provides only one function:  
```python
def parse_arguments(command:dict, comment:str) -> (bool, dict, dict)
```  

It takes one __"command" dictionary__, and a __"comment" string__.  

#### __The command__

is a simple key-value collection of expected flags, where a attribute name maps to either a type, or a default value, from which the type is infered.  
```python
command = {
    'arg1':bool,
    'arg2':str,
    'arg3':32,
    'arg4':3.14,
}
```

#### __The comment__
is just a list of space-separated arguments, with words starting with a minus (`'-'`) denoting a keyword, and anything without a minus as the first character being a value to the previous keyword.  
```python
'-name bob -age 99 -celsius 30.5 -thirsty'
```  
is a valid string for the command  
```python
{
    'name'   : str,
    'weather': 'sunny',
    'celsius': float,
    'age'    : int,
    'thirsty': bool,
    'tired'  : bool
}
```

#### __The primitive types:__
Currently the following primitive types are supported:  
- `str`
    - a `str` argument requires one value.
    - e.g.: `-weather sunny`
- `bool`
    - a `bool` argument requires no values. setting the flag automatically sets the value to `True`.
    - writing `bool` is the same as using the default value `False`.
    - e.g.: `-is_wet`
- `int`
    - a `int` argument requires one value.
    - the value will first be cast to `float`, and then to `int`, partly due to how python works, and also to check for a remainder in case the provided value was actually in a float format.
    - e.g.: `-age 99`, `-negative -1`
- `float`
    - a `float` argument requires one value.
    - the value has to be castable to `float`. what is and what isn't a float can be suprising, so you should check the [casting rules](https://stackoverflow.com/a/20929983/) beforehand.
    - e.g.: `-pi 3.14`, `-negative -1.0`, `-weird nan`, `-large inf`, `-small -inf`
  
Any of these types can be declared either by just using the `type` directly, or by giving a default value of the specific `type`. All arguments that use the `type` directly have to be passed in the comment. If a default value is specified, or if the `type` is `bool`, the argument does not have to be passed in the comment, and instead the `result` will simply contain the default value. This changes with composite types (see below). If an argument was passed in the comment or not, can be seen by looking at the `is_set` return value (see below).

  
##### __The composite types__
`list` and `tuple` (referred to as 'array' when it can be either one of them) are also supported, however due to pythons lack of strong typing, they have slightly different semantics.  

Specifying only the type `list` or `tuple`, will result in an 'unbounded array' of that type, meaning that all values following the keyword will be added to the array, until either the end of arguments is reached, or a value starts with a minus (`'-'`), which denotes the start of the next argument. All values or the array will be of type `str`. This kind of argument should be used with caution, because, for instance, negative values will be treated as the start of a new argument.  
```python
{
    'unbounded_list' : list,
    'unbounded_tuple': tuple,
}
```  

The other, better way to use arrays is to actually create an array containing the types, default values, and ordering you want the values to have. This can get arbitrarily complex, mixing and matching any supported primitive type you want. The only thing not allowed, is using an unbounded array (see above).  
All values will be cast to the corresponding type using all the same semantics as of they were single values (see above). The only exception to that is the `bool` type, where the value has to be either `'True'`, `'False'`, or interpretable as a `float`, which will then be cast to a `bool`. This means that e.g. `'0.0'` will result in `False`, and `'123'` will result in `'True'` (careful, check the [casting rules](https://docs.python.org/3.3/library/stdtypes.html?highlight=frozenset#truth-value-testing) first).
```python
{
    'arg1': [int]*5,
    'arg2': (3.14, 'pi', bool),
    'arg3': (bool, str, 123)*2,
    'arg4': [[0]*3, [1]*3, [str]*3],
    'arg5': [str, int, bool, True, [1, '2', 3, bool], (2.1, float)]
}
```

#### __The return value__
is a three-tuple of `(success, result, is_set)`.  
- `success` is a `bool`, saying whether or not parsing was successful. If it is `False`, the other two arguments are not guaranteed to be valid. There will be an error message with details on what happened to help debugging.  
- `result` is a `dict` with exactly the same keys as the input `command`, with the corresponding values set to whatever was extracted from the comment. In cases where `success` if `False`, this might only be partially filled out, so `success` should always be checked.
- `is_set` is a `dict`, which also contains exactly the same keys as the input `command`, this time mapping to a `bool`. It is `True` if `comment` contains a value for the particular argument, and `False` otherwise. In cases where a default value is given in `command`, the same rule applies. Meaning that only if the default was overwritten by an argument in `comment` will the `is_set` value be `True`. This holds even for `bool`s, which default to `False` even if no explicit default was given.

## Examples

In [None]:
command = {
    'test'  : bool,
    'sunny' : False,
    'toast' : str,
    'shots' : int,
    'scale' : float,
    'scoops': [str, int, bool, [1, 2, 3, bool], (float, float)],
    # 'valid' : (bool, bool),
    'valid' : (1, 1.23, bool, 'hi', [1, 2, bool]),
    'nah'   : 'boi',
    'sweet' : bool,
    'nr'    : int,
    'list'  : list
}

comment = '-sunny -toast jelly -shots 25 -scale 69105.1234 -test -list 2 -scoops a 1 0 5 6 7 False 3.0 2.1 -nr 21'
# comment = '-sunny -toast jelly -shots 25 -scale 69105.1234 -test -nr 1'
parse_arguments(command, comment)

[SyntaxError]: Argument 0 ('') is not valid.


(False,
 {'test': bool,
  'sunny': False,
  'toast': str,
  'shots': int,
  'scale': float,
  'scoops': [str, int, bool, [1, 2, 3, bool], (float, float)],
  'valid': (1, 1.23, bool, 'hi', [1, 2, bool]),
  'nah': 'boi',
  'sweet': bool,
  'nr': int,
  'list': list},
 {'test': False,
  'sunny': False,
  'toast': False,
  'shots': False,
  'scale': False,
  'scoops': False,
  'valid': False,
  'nah': False,
  'sweet': False,
  'nr': False,
  'list': False})

```python
%timeit parse_arguments(command, comment)
>>> 44 µs ± 98.9 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
```

# Imports [Module]

In [None]:
# +default_exp -to imports -scoped -no_dunder_all

In [None]:
# +export
# This Flag allows anyone to know if this Module exists in their namespace
MODULE__IMPORTS__FLAG = None

In [None]:
# +export
import os,re,functools
import concurrent.futures
from threading import Thread
from queue import Queue
from configparser import ConfigParser
from pathlib import Path
from collections import OrderedDict
# from nbformat.sign import NotebookNotary
# from base64 import b64decode,b64encode
# from types import MethodType,FunctionType

## General

In [None]:
# +export
def test_eq(a,b): assert a==b, f'{a}, {b}'

## Config

In [None]:
# +export
def save_config_file(file, d, **kwargs):
    "Write settings dict to a new config file, or overwrite the existing one."
    config = ConfigParser(**kwargs)
    config['DEFAULT'] = d
    config.write(open(file, 'w'))

In [None]:
# +export
def read_config_file(file, **kwargs):
    config = ConfigParser(**kwargs)
    config.read(file)
    return config

In [None]:
# +export
_defaults = {"host": "github", "doc_host": "https://%(user)s.github.io", "doc_baseurl": "/%(lib_name)s/"}

def _add_new_defaults(cfg, file):
    "If an existing Config does not contain these values, add them, and save the config file.\n"\
    "This is meant to be used in case new default values are added in later versions."
    for k,v in _defaults.items():
        if cfg.get(k, None) is None:
            cfg[k] = v
            save_config_file(file, cfg)

In [None]:
# +export
@functools.lru_cache(maxsize=None)
class Config:
    "Store the basic information for nbdev to work"
    def __init__(self, cfg_name='settings.ini'):
        cfg_path = Path.cwd().absolute().resolve()
        while cfg_path != cfg_path.parent and not (cfg_path/cfg_name).exists(): cfg_path = cfg_path.parent
        self.config_path,self.config_file = cfg_path,cfg_path/cfg_name
        assert self.config_file.exists(), f"Could not find {cfg_name}"
        self.d = read_config_file(self.config_file)['DEFAULT']

    def __getattr__(self,k):
        if k.endswith('_path'): return self._path_to(k)
        try: return self.d[k]
        except KeyError: raise AttributeError(f"Config ({self.config_file.name}) has no attribute '{k}'") from None
    
    def _path_to(self,k,default=None):
        v = self.d.get(k, default)
        if v is None: raise AttributeError(f"Config ({self.config_file.name}) has no attribute '{k}'")
        return self.config_path/v
    
    def path_to(self,k,default=None):
        "Retrieve a path saved in Config relative to the folder the Config file is in."
        return self._path_to((k if k.endswith('_path') else k+'_path'), default)

    def get(self,k,default=None): return self.d.get(k, default)
    def __setitem__(self,k,v): self.d[k] = str(v)
    def __contains__(self,k):  return k in self.d
    def save(self): save_config_file(self.config_file,self.d)

In [None]:
# +export
def create_config(lib_name,
                  cfg_path='.', cfg_name='settings.ini',
                  license=None,
                  author=None, author_email=None, copyright=None,
                  maintainer=None, maintainer_email=None,
                  nbs_path='nbs',
                  lib_path='%(lib_name)s', doc_path='docs',
                  version='0.0.1', min_python='3.7',
                  language='English', status=None,
                  audience=None, title='%(lib_name)s',
                  description=None, keywords=None,
                  requirements=None, console_scripts=None, dep_links=None,
                  git_user=None, host='github', branch='master',
                  git_url='https://github.com/%(git_user)s/%(lib_name)s/tree/%(branch)s/',
                  doc_host='https://%(git_user)s.github.io',
                  doc_baseurl='/%(lib_name)s/',
                  bug_tracker_url='https://github.com/%(git_user)s/%(lib_name)s/issues',
                  repo_name=None, company_name=None,
                  **kwargs):
    if git_user is None: host = branch = git_url = doc_host = doc_baseurl = bug_tracker_url= None
    else: user = git_user # NOTE: backwards compatibility
    args = locals()
    path = args.pop('cfg_path')
    name = args.pop('cfg_name')
    kwargs = args.pop('kwargs') # NOTE: locals() also contains `kwargs` as a key, so remove it
    config = OrderedDict(filter(lambda x: x[1] is not None, # NOTE: Filter out None values
                                sorted({**args, **kwargs}.items(),
                                       key=lambda x:x[0]))) # NOTE: Sort by key
    save_config_file(Path(path)/name, config)

In [None]:
# +export -internal
create_config.__doc__ = """
Create a new config file and save it.

Parameters
----------
`lib_name` : str
    The name of the package that will be created

`cfg_path` : str, optional
    The path where to create the config file.
`cfg_name` : str, optional
    The name of the config file to be created.
    
`license` : {'apache2', }, optional
    The license under which this project is published.
`author` : str, optional
    The name of the author of this project (probably you).
`author_email` : str, optional
    The authors email address.
`copyright` : str, optional
    The authors name, or a company name.
`maintainer` : str, optional
    The person or group which maintains the project.
`maintainer_email` : str, optional
    The maintainers email address.

`nbs_path` : str, optional
    A path to a subdirectory relative to where the 'settings.ini' file is located.
    All of the notebooks that you want to have processed need to be in this folder,
    or in a subfolder.
`lib_path` : str, optional
    A path to a subdirectory relative to where the 'settings.ini' file is located.
    This is the folder where all generated .py files will be stored,
    and the name you use when importing the package.
`doc_path` : str, optional
    A path to a subdirectory relative to where the 'settings.ini' file is located.
    Present for compatibility with the original 'nbdev' project. This folder is
    where documentation generated from your notebooks in `nbs_path` is stored.

`version` : str, optional
    A version number in the '{major}.{minor}.{patch}' semantic versioning format.
`min_python` : {..., '3.6', 3.7', '3.8', '3.9', ...}, optional
    The minimum python version necessary to run your code [1].
`language` : {'English', ...}, optional
    The natural language used in your program [1].
`status` : {'1', '2', '3', '4', '5', '6', '7'}, optional
    The development status of your project [1].
    The numbers 1-7 correspond to the following status respectively:
    Planning, Pre-Alpha, Alpha, Beta, Production, Mature, Inactive
`audience` : {'Developers', 'End Users/Desktop', 'Other Audience', ...}, optional
    The intended audience of your project [1].
`title` : str, optional
    By default the same as your library name.
    Currently only used by the original nbdev project.
`description` : str, optional
    A short, one sentence, description of your project.
`keywords` : str, optional
    Space separated keywords / tags that describe your project.
    e.g. 'python jupyter notebook nbdev'.

`requirements` : str, optional
    Packages that are minimally required for your project to run.
    Written in the same format as setuptools requirements [2].
`console_scripts` : str, optional
    Space separated list of key=value pairs.
    The key is the name of the command,
    and value is the python module and function that is supposed to be called.
    Written in the same format as setuptools console-scripts [3].
`dep_links` : str, optional
    Currently not in use.
    Written in the same format as setuptools dependency links.

`git_user` : str, optional
    Your git username.
`host` : str, optional
    The name of your git repo host.
`branch` : str, optional
    The name of the main git branch.
`git_url` : str, optional
    The git URL where your project lives.

`doc_host` : str, optional
    The URL where you have documentation hosted.
`doc_baseurl` : str, optional
    The URL path relative to the `doc_host`,
    which points to where the docs for this project are stored.

`bug_tracker_url` : str, optional
    The URL where bugs and issues are tracked and discussed.

`repo_name` : str, optional
    For enterprise git users.
`company_name` : str, optional
    For enterprise git users.

Returns
-------
None

See Also
--------
[1] https://pypi.org/classifiers/
[2] https://packaging.python.org/discussions/install-requires-vs-requirements/
[3] https://python-packaging.readthedocs.io/en/latest/command-line-scripts.html
"""

```python
create_config('nbdev_rewrite', cfg_path='..',
              license='apache2',
              author='Florian Peters', copyright='Florian Peters',
              nbs_path='notebooks',
              status='2', audience='Developers',
              git_user='flpeters')
```

## Environment Information

In [None]:
# +export
def in_ipython():
    "Check if the code is running in the ipython environment (jupyter including)"
    program_name = os.path.basename(os.getenv('_', ''))
    if ('jupyter-notebook' in program_name or # jupyter-notebook
        'ipython'          in program_name or # ipython
        'JPY_PARENT_PID'   in os.environ):    # ipython-notebook
        return True
    else: return False
IN_IPYTHON = in_ipython()

In [None]:
# +export
def in_colab():
    "Check if the code is running in Google Colaboratory"
    try:
        from google import colab
        return True
    except: return False
IN_COLAB = in_colab()

In [None]:
# +export
def in_notebook():
    "Check if the code is running in a jupyter notebook"
    if in_colab(): return True
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell': return True   # Jupyter notebook, Spyder or qtconsole
        elif shell == 'TerminalInteractiveShell': return False  # Terminal running IPython
        else: return False  # Other type (?)
    except NameError: return False      # Probably standard Python interpreter
IN_NOTEBOOK = in_notebook()

## Multiprocessing

In [None]:
# +export
def num_cpus():
    "Get number of cpus"
    try:                   return len(os.sched_getaffinity(0)) # NOTE: not available on all platforms
    except AttributeError: return os.cpu_count()

In [None]:
# +export
class ProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor):
    "Like `concurrent.futures.ProcessPoolExecutor` but handles 0 `max_workers`."
    def __init__(self, max_workers=None, on_exc=print, **kwargs):
        self.not_parallel = max_workers==0
        self.on_exc = on_exc
        if self.not_parallel: max_workers=1
        super().__init__(max_workers, **kwargs)

    def map(self, f, items, *args, **kwargs):
        g = functools.partial(f, *args, **kwargs)
        if self.not_parallel: return map(g, items)
        try: return super().map(g, items)
        except Exception as e: self.on_exc(e)

In [None]:
# +export
def parallel(f, items, *args, n_workers=None, **kwargs):
    "Applies `func` in parallel to `items`, using `n_workers`"
    if n_workers is None: n_workers = min(16, num_cpus())
    with ProcessPoolExecutor(n_workers) as ex:
        r = ex.map(f,items, *args, **kwargs)
        return list(r)

In [None]:
# +export
# https://github.com/justheuristic/prefetch_generator
class BackgroundGenerator(Thread):
    "Computes elements of a Generator in a background Thread."
    def __init__(self, generator, max_prefetch:int=-1):
        """
        `generator`: A Generator to wrap and prefetch from in a separate thread.
        `max_prefetch`: How many items to maximally prefetch at any given time.
        If `max_prefetch` is <= 0, then the queue size is infinite.
        """
        super().__init__()
        self.queue, self.generator, self.daemon = Queue(max_prefetch), generator, True
        self.start()
    
    def run(self):
        try:
            for item in self.generator: self.queue.put(item)
        except Exception as e:
            print('WARNING: Failed in BackgroundGenerator Thread!')
            raise e
        finally: self.queue.put(StopIteration)
    
    def __iter__(self): return self
    def __next__(self):
        next_item = self.queue.get()
        if next_item is StopIteration: raise StopIteration
        return next_item

In [None]:
# +export
def prefetch(max_prefetch:int=-1):
    """
    Decorator for wrapping a `yield`-ing Function with `BackgroundGenerator`,
    which computes elements of the generator in a background Thread.
    
    A new instance of `BackgroundGenerator` is created every time the decorated function is called.
    
    `max_prefetch`: How many items to maximally prefetch at any given time.
    If `max_prefetch` is <= 0, then the queue size is infinite.
    """
    def decorator(generator):
        def wrapper(*args,**kwargs):
            return BackgroundGenerator(generator(*args,**kwargs), max_prefetch=max_prefetch)
        functools.update_wrapper(wrapper, generator)
        return wrapper
    return decorator

## Regex

In [None]:
# +export
class ReLibName():
    "Regex expression that's compiled at first use but not before since it needs `Config().lib_name`"
    def __init__(self, pat, flags=0): self._re,self.pat,self.flags = None,pat,flags
    @property
    def re(self):
        if not hasattr(Config(), 'lib_name'): raise Exception("Please fill in the library name in settings.ini.")
        self.pat = self.pat.replace('LIB_NAME', Config().lib_name)
        if self._re is None: self._re = re.compile(self.pat, self.flags)
        return self._re

## Function Tools

In [None]:
# +export
def compose(*funcs, order=None):
    "Create a function that composes all functions in `funcs`, "\
    "passing along remaining `*args` and `**kwargs` to all"
    if len(funcs)==0: return noop
    if len(funcs)==1: return funcs[0]
    def _inner(x, *args, **kwargs):
        for f in funcs: x = f(x, *args, **kwargs)
        return x
    return _inner

In [None]:
# +export
def last_index(x, o):
    "Finds the last index of occurence of `x` in `o` (returns -1 if no occurence)"
    try: return next(i for i in reversed(range(len(o))) if o[i] == x)
    except StopIteration: return -1

# Main [Module]

In [None]:
# +default_exp -to main

In [None]:
# +export
# This Flag allows anyone to know if this Module exists in their namespace
MODULE__MAIN__FLAG = None

In [None]:
THIS_FILE = '00_export_v4.ipynb'

In [None]:
# +export
from collections import defaultdict
from inspect import signature, currentframe, getfullargspec
import nbformat
import ast
from ast import iter_fields, AST
import _ast

Only do these imports if executing as a python file.  
For bootstrapping purposes these modules are also contained in this notebook file.  
They are however exported to their own respective files, so when running as a python file, we need to import them back.

In [None]:
# +export
if (__name__ != '__main__') or ('MODULE__ARGUMENT_PARSING__FLAG' not in globals()):
    from nbdev_rewrite.argument_parsing import *
assert 'MODULE__ARGUMENT_PARSING__FLAG' in globals(), "Missing the 'argument_parsing' module."

if (__name__ != '__main__') or ('MODULE__IMPORTS__FLAG' not in globals()):
    from nbdev_rewrite.imports import *
assert 'MODULE__IMPORTS__FLAG' in globals(), "Missing the 'imports' module."

## Logging

### Print Options / Report Flags

In [None]:
# +export -internal
main_REPORT_OPTIONAL_ERROR:bool = False
main_REPORT_COMMAND_FOUND:bool = False
main_REPORT_RUN_STATISTICS:bool = True

In [None]:
# +export
def set_main_report_options(report_optional_error:bool=False,
                            report_command_found:bool=False,
                            report_run_statistics:bool=True):
    "Set options for how the Main Module will behave on encountering errors or warnings.\n"\
    "report_optional_error prints the information and then continues."
    global main_REPORT_OPTIONAL_ERROR, main_REPORT_COMMAND_FOUND, main_REPORT_RUN_STATISTICS
    main_REPORT_OPTIONAL_ERROR = report_optional_error
    main_REPORT_COMMAND_FOUND  = report_command_found
    main_REPORT_RUN_STATISTICS = report_run_statistics

### report_successful_export()

In [None]:
# +export -internal
def relative_path(file_path, relative_directory=Config().config_file.parent):
    return os.path.relpath(file_path, relative_directory).replace('\\', '/')

In [None]:
# +export
def report_successful_export(parsed_files, merged_files):
    "Report stats and compressed information about parsed and exported files."
    n_nbs = nr_of_notebooks_parsed = len(parsed_files['files'])
    n_py  = nr_of_output_py_files = len(merged_files)
    Title = f'{n_nbs} notebook{"s"*int(n_nbs!=1)} {"have" if n_nbs!=1 else "has"} been parsed, '\
            f'resulting in {n_py} python file{"s"*int(n_py!=1)}.\n\n'
    
    # Information about which notebooks export to which python files
    nb_info = f'The following {n_nbs} notebook{"s"*int(n_nbs!=1)} have been parsed:\n'
    nb_info += '-' * (len(nb_info) - 1)
    n_out = nr_of_files_outputting_code = 0
    for file in parsed_files['files']:
        nb_info += f"\n{file['relative_origin']} ({len(file['cells'])} cells total)\n"
        default = file['export_scopes'][(0,)]
        n_exp = len(file['export_scopes']) - int(default is None)
        nb_info += f'---> default:\t{None if (default is None) else relative_path(default["target"])}'
        for scope, target in sorted(file['export_scopes'].items(), key=lambda x: x[0]):
            if scope == (0,): continue
            nb_info += f"\n---> {scope}:\t{relative_path(target['target'])}"
        if n_exp > 0: n_out += 1
    
    
    Middle = f'Of the {n_nbs} notebook{"s"*int(n_nbs!=1)} parsed, '\
             f'{n_out} {"are" if n_out!=1 else "is"} outputting code.'
    
    # Information about how many Python files have been generated, and the number of cells exported to each
    py_info = f'The following {n_py} python file{"s"*int(n_py!=1)} {"have" if n_py!=1 else "has"} been generated:\n'
    py_info += '-' * (len(py_info) - 1) + '\n'
    for to, state in merged_files.items():
        n_cells = len(state['code'])
        py_info += f'---> {n_cells} cell{"s"*int(n_cells!=1)} output to {relative_path(to)}\n'
        
    print(f'{Title}{nb_info}\n\n{Middle}\n\n{py_info}')

### StackTrace with report_error() and report_optional_error()

This is a class for passing along contextual information during execution.  
The class is a linked list, which can be extended each time a new function is called.  
Everytime a function is called, create a new StackTrace instance, and pass the current instance to it.

In [None]:
# +export
class StackTrace: pass # only for :StackTrace annotations to work
class StackTrace:
    _up:StackTrace = None
    namespace:str = None
    lineno   :int = None
    _ext_file:dict = None
    
    def __init__(self, namespace:object=None, up:StackTrace=None):
        "`namespace` can be a function, a class, or None.\n"\
        "`up` is optional and can be another StackTrace instance."
        self.namespace = f'<{namespace.__qualname__}>()' if namespace else currentframe().f_back.f_code.co_name
        self._up, self.lineno, self._ext_file = up, currentframe().f_back.f_lineno, {}
    
    def ext(self, file:str=None, cellno:int=None, lineno:int=None, excerpt:str=None, span:(int, int)=None):
        "Set context information for reporting errors in external files e.g. notebooks."
        e = self._ext_file
        if not (file    is None) : e['file'   ] = file
        if not (cellno  is None) : e['cellno' ] = cellno
        if not (lineno  is None) : e['lineno' ] = lineno
        if not (excerpt is None) : e['excerpt'] = excerpt
        if not (span    is None) : e['span'   ] = span # TODO: convert a single int to tuple?
    
    def ext_clear_file   (self): self._ext_file.pop('file'   , None)
    def ext_clear_cellno (self): self._ext_file.pop('cellno' , None)
    def ext_clear_lineno (self): self._ext_file.pop('lineno' , None)
    def ext_clear_excerpt(self): self._ext_file.pop('excerpt', None)
    def ext_clear_span   (self): self._ext_file.pop('span'   , None)
    
    def to_list(self):
        "Creates list of the entire StackTrace (most recent last)."
        if self._up: return [*self._up.to_list(), self]
        else: return [self]
        
    def _reduce_ext(self):
        "Combines all `StackTrace._ext_file` dicts into one, prefering more recest settings over old ones."
        ext = [s._ext_file for s in self.to_list() if s._ext_file]
        e = {}
        for d in ext: e.update(d)
        return e
        
    def up(self, up:StackTrace):
        "Set this StackTraces `_up` reference and return `self`. Useful for chaining references."
        self._up=up
        return self
    
    def __repr__(self): return f"{__name__}.StackTrace(namespace={self.namespace},line={self.lineno})"
    
    def _repr(self):
        "Recursively create a string of all StackTraces for printing error messages."
        return f"{'' if self._up is None else self._up._repr()}"\
               f"<{__name__}>, line {self.lineno} in {self.namespace}\n"
    
    def _repr_ext(self, file:str=None, cellno:int=None, lineno:int=None, excerpt:str=None, span:(int, int)=None):
        "Create a string from the `StackTrace._ext_file` dict for printing error messages."
        s = f"<{file}>, cell {cellno}, line {lineno}\n"
        if excerpt:
            x = f"--->{' ' if ((lineno is None) or (0 <= lineno <= 9)) else ''}{lineno} "
            s += f"{x}{excerpt}\n"\
                 f"{(' ' * (len(x) + span[0]) + '^' * span[1]) if span else ''}\n"
        return s
    
    def report_error(self, err:Exception,
                     file:str=None, cellno:int=None, lineno:int=None, excerpt:str=None, span:(int, int)=None,
                     success=False, _ln_of_callsite=True) -> bool:
        "Report the Error `err`.\nOther args are used for setting `_ext_file` and are optional.\n"\
        "Returns whatever is passes as `success`."
        if _ln_of_callsite: self.lineno = currentframe().f_back.f_lineno
        err_type = err.__class__.__name__
        s = f"{'-'*75}\n"\
            f"{err_type}{' '*(41-len(err_type))}Stacktrace (most recent call last)\n"\
            f"{self._repr()}\n"
        self.ext(file, cellno, lineno, excerpt, span) # TODO: should this maybe be passed to _reduce_ext?
        ext:dict = self._reduce_ext()
        if ext: s += f"{self._repr_ext(**ext)}\n" # TODO: check for len(ext) > 0 and values not None?
        s += f"[{err_type}]: {err}"
        print(s) # NOTE: This is what prints the error message.
        return success
    
    def report_caught_syntax_error(self, err:SyntaxError, msg='invalid syntax', success=False):
        "Report an error taking advantage of common formatting when handling a python SyntaxError."
        self.lineno = currentframe().f_back.f_lineno
        return self.report_error(SyntaxError(msg),
                                 excerpt=err.text[:-1],
                                 lineno=err.lineno,
                                 span=(err.offset-1, 1),
                                 success=success,
                                 _ln_of_callsite=False)
    
    def report_optional_error(self, err:Exception,
                        file:str=None, cellno:int=None, lineno:int=None, excerpt:str=None, span:(int, int)=None):
        "Report the error if the global variable `main_REPORT_OPTIONAL_ERROR` is set."
        if main_REPORT_OPTIONAL_ERROR:
            self.lineno = currentframe().f_back.f_lineno
            self.report_error(err=err, _ln_of_callsite=False,
                              file=file,
                              cellno=cellno, lineno=lineno,
                              excerpt=excerpt, span=span)

In [None]:
# +export
def Traced(f):
    "The Annotated function will have a StackTrace instance passed to it as the `st` keyword-argument.\n"\
    "That instance represents the annotated function, with a reference to the calling site."
    spec = getfullargspec(f)
    assert ('st' in spec.args) or ('st' in spec.kwonlyargs), "Traced functions have to take a 'st' argument."
    if 'st' in spec.annotations:
        assert spec.annotations['st'] == StackTrace, "A traced functions 'st' argument is reserved for "\
                                                     "a StackTrace. Other annotations are not allowed."
    else: f.__annotations__['st'] = StackTrace # This modifies the original function. Is that acceptable?
    
    _st = StackTrace(f)
    def _wrapper(*args, st:StackTrace=None, **kwargs):
        if not st:
            st = StackTrace(None)
            st.namespace = currentframe().f_back.f_code.co_name
        elif (st is _st): return f(*args, st=st, **kwargs) # prevent self referencing due to e.g. recursion.
        st.lineno = currentframe().f_back.f_lineno
        # NOTE: clearing _st._ext_file to an empty dict like this is actually faster than not clearing it...
        res = f(*args, st=_st.up(st), **kwargs)
        _st._ext_file = {}
        return res
    
    functools.update_wrapper(_wrapper, f)
    return _wrapper

#### Example Code

In [None]:
@Traced
def _do_the_thing(st):
    success = True
    if True:# Error has happened!
        return st.report_error(Exception('Failed doing the thing!'))
    return success

In [None]:
@Traced
def _start(st):
    success = True
    success = _do_the_thing(st=st)
    return success

In [None]:
_start()

---------------------------------------------------------------------------
Exception                                Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 4 in <_start>()
<__main__>, line 5 in <_do_the_thing>()

[Exception]: Failed doing the thing!


False

In [None]:
@Traced
def _recursive_stuff(i, st):
    print(i, st, st._up)
    if i <= 0: return 1
    else: return i + _recursive_stuff(i - 1, st=st)

In [None]:
_recursive_stuff(3)

3 __main__.StackTrace(namespace=<_recursive_stuff>(),line=12) __main__.StackTrace(namespace=<module>,line=1)
2 __main__.StackTrace(namespace=<_recursive_stuff>(),line=12) __main__.StackTrace(namespace=<module>,line=1)
1 __main__.StackTrace(namespace=<_recursive_stuff>(),line=12) __main__.StackTrace(namespace=<module>,line=1)
0 __main__.StackTrace(namespace=<_recursive_stuff>(),line=12) __main__.StackTrace(namespace=<module>,line=1)


7

In [None]:
_st=StackTrace(namespace=_start)
_st.ext(file='file.py', lineno=45, excerpt='# weird comment')
_st=StackTrace(_do_the_thing, up=_st)
_st.ext(cellno=18, lineno=33)
_st.ext(excerpt = '# this overwrites the weird comment')
_st.report_error(SyntaxError('Failed to parse'), span=(2, 7))

---------------------------------------------------------------------------
SyntaxError                              Stacktrace (most recent call last)
<__main__>, line 1 in <_start>()
<__main__>, line 6 in <_do_the_thing>()

<file.py>, cell 18, line 33
--->33 # this overwrites the weird comment
         ^^^^^^^

[SyntaxError]: Failed to parse


False

In [None]:
a = StackTrace(None)

In [None]:
a.ext(file='test_file.py'); a._ext_file

{'file': 'test_file.py'}

In [None]:
b = StackTrace((lambda:'some function'), up=a)

In [None]:
b.ext(lineno=2); b._ext_file

{'lineno': 2}

In [None]:
b.report_error(Exception('OH NO! Something went wrong!'))

---------------------------------------------------------------------------
Exception                                Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 1 in <<lambda>>()

<test_file.py>, cell None, line 2

[Exception]: OH NO! Something went wrong!


False

In [None]:
b._up._ext_file

{'file': 'test_file.py'}

In [None]:
b._ext_file

{'lineno': 2}

In [None]:
b.to_list()

[__main__.StackTrace(namespace=<module>,line=1),
 __main__.StackTrace(namespace=<<lambda>>(),line=1)]

## Find and Parse Comments

### Finding comments in source code

In [None]:
# +export
# TODO: Only look for 0 indent comments?
def iter_comments(src:str, pure_comments_only:bool=True, line_limit:int=None) -> (str, (int, int)):
    "Detect all comments in a piece of code, excluding those that are a part of a string."
    in_lstr = in_sstr = False
    count, quote = 1, ''
    for i, line in enumerate(src.splitlines()[:line_limit]):
        is_pure, escape, prev_c = True, False, '\n'
        for j, c in enumerate(line):
            # we can't break as soon as not is_pure, because we have to detect if a multiline string beginns
            if is_pure and (not (c.isspace() or c == '#')): is_pure = False
            if (in_sstr or in_lstr):
                # assert in_sstr ^ in_lstr # XOR
                if escape: count = 0
                else:
                    if (c == quote):
                        count = ((count + 1) if (c == prev_c) else 1)
                        if in_sstr: in_sstr = False
                        elif (in_lstr and (count == 3)): count, in_lstr = 0, False
                escape = False if escape else (c == '\\')
            else:                    
                if (c == '#'):
                    if (pure_comments_only and is_pure): yield (line, (i, j))
                    elif (not pure_comments_only):       yield (line[j:], (i, j))
                    break
                elif c == "'" or c == '"':
                    count = ((count + 1) if (c == prev_c) else 1)
                    if count == 1: in_sstr = True
                    elif count == 3: count, in_lstr = 0, True
                    else: assert False, 'If this code path happens, then the code keeping track of quotes is broken.'
                    quote = c
            prev_c = c

In [None]:
list(iter_comments('# this is a zero indented comment'))

[('# this is a zero indented comment', (0, 0))]

### Parsing

This regex is used to remove whitespace and the '#' of python comments.  
The content of the comment will be added to a group, which can be extracted afterwards.

In [None]:
# +export -internal
# https://docs.python.org/3/library/re.html
re_match_comment = re.compile(r"""
        ^              # start of the string
        \s?            # 0 or 1 whitespace
        \#+\s?         # 1 or more literal "#", then 0 or 1 whitespace
        (.*)           # group of arbitrary symbols (except new line)
        $              # end of the string
        """,re.IGNORECASE | re.VERBOSE) # re.MULTILINE is not passed, since this regex is used on each line separately.

In [None]:
re_match_comment.search('# hi')

<re.Match object; span=(0, 4), match='# hi'>

In [None]:
re_match_comment.search('a\n# hi')

In [None]:
re_match_comment.search('# # hi').groups()

('# hi',)

This specifies what a valid nbdev comment has to look like, and filters out everything whose syntax does not fit with any of the registered commands.

In [None]:
# +export
@Traced
def parse_comment(all_commands:dict, comment:str, st:StackTrace) -> (bool, str, dict, dict):
    "Finds command names and arguments in comments and parses them with parse_arguments()"
    res = re_match_comment.search(comment)
    if not res:
        st.report_optional_error(SyntaxError('Not a valid comment syntax.'))
        return False, None, None, None
    
    all_args = res.groups()[0].split()
    if len(all_args) == 0:
        st.report_optional_error(SyntaxError(f"Need at least one argument in comment. Reveived: '{comment}'"))
        return False, None, None, None
    
    cmd, *args = all_args
    if cmd[0] != '+':
        st.report_optional_error(SyntaxError("The first argument (the command to execute) does not start with a '+'."\
                                            f"It was: '{cmd}'"), span=(1, 3))
        return False, None, None, None
    
    cmd = cmd[1:] # remove the '+'
    if cmd not in all_commands:
        st.report_optional_error(KeyError(f"'{cmd}' is not a recognized command. See 'all_commands'."))
        return False, None, None, None
    
    success, result, is_set = parse_arguments(all_commands[cmd], args)
    if not success: return False, None, None, None
    
    return True, cmd, result, is_set

### Examples

In [None]:
kw_default_exp = {'scope': 'file' , 'to': str}
kw_export      = {'internal': bool, 'to': ''}

all_commands   = {'default_exp': kw_default_exp, 'export': kw_export}

In [None]:
parse_arguments(all_commands['export'], '-internal -to file.py')

[SyntaxError]: Argument 0 ('') is not valid.


(False, {'internal': bool, 'to': ''}, {'internal': False, 'to': False})

In [None]:
parse_comment(all_commands, '# +export -internal -to file.py', st=StackTrace(None))

(True,
 'export',
 {'internal': True, 'to': 'file.py'},
 {'internal': True, 'to': True})

## Convert Cell from String

In [None]:
# +export
@Traced
def from_string_cell(source:str, st:StackTrace) -> (bool, str):
    "Take a cell containing a single string and return the content of that string."
    try: tree = ast.parse(source).body
    except SyntaxError as e: return st.report_caught_syntax_error(e), None
    if len(tree) == 1:
        node = tree[0]
        if isinstance(node, _ast.Expr):
            if isinstance(node.value, _ast.Str):
                code = node.value.s.strip()
                try: ast.parse(code)
                except SyntaxError as e:
                    return st.report_caught_syntax_error(e, msg="The code in the 'from_string' "\
                                                         "cell is invalid python syntax."), None
                return True, code
#             elif isinstance(node.value, _ast.JoinedStr):
#                 return st.report_error(SyntaxError("'f'-strings are not allowed.")), None
            else: return st.report_error(SyntaxError(f"Expected cell to contain a single '_ast.Str' expression, "\
                                                     f"but got {type(node.value)}")), None
        else: return st.report_error(SyntaxError(f"Expected cell to contain a single expression '_ast.Expr', "\
                                                 f"but got {type(node)}")), None
    else: return st.report_error(SyntaxError('Cell contains more than one Expression. '\
                                             'Expected cell to contain exactly one String.')), None

### Examples

In [None]:
examples = ['r"""\nsetup(**setup_call)\n""";',
            '"""\nsetup(**setup_call)\n""";',
            '"""\nsetup(**setup_call)\n"""',
            '"""setup(**setup_call)"""',
            '"setup(**setup_call)"',
            'r"setup(**setup_call)"',
            'r"setup(**setup_call)";',
            '"print()"\\\n"print()"',
            '"""print(\'a\')"""\n"""print(\'b\')"""',
            '1+1\n"print(2)"',
            '"print(2)"\n1+1',
            "f'print(1)'",
            'test=123',
            "f'''\nprint{test}\n''';",
            '1+1',
            '!what is this???'
         ]

In [None]:
from_string_cell(examples[0])

(True, 'setup(**setup_call)')

In [None]:
from_string_cell(examples[7])

---------------------------------------------------------------------------
SyntaxError                              Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 14 in <from_string_cell>()

<None>, cell None, line 1
---> 1 print()print()
                  ^

[SyntaxError]: The code in the 'from_string' cell is invalid python syntax.


(False, None)

In [None]:
from_string_cell(examples[8])
print('\n--------------')
print(examples[8])

---------------------------------------------------------------------------
SyntaxError                              Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 23 in <from_string_cell>()

[SyntaxError]: Cell contains more than one Expression. Expected cell to contain exactly one String.

--------------
"""print('a')"""
"""print('b')"""


In [None]:
from_string_cell(examples[12])
print('\n--------------')
print(examples[12])

---------------------------------------------------------------------------
SyntaxError                              Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 21 in <from_string_cell>()

[SyntaxError]: Expected cell to contain a single expression '_ast.Expr', but got <class '_ast.Assign'>

--------------
test=123


In [None]:
from_string_cell(examples[13])
print('\n--------------')
print(examples[13])

---------------------------------------------------------------------------
SyntaxError                              Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 19 in <from_string_cell>()

[SyntaxError]: Expected cell to contain a single '_ast.Str' expression, but got <class '_ast.JoinedStr'>

--------------
f'''
print{test}
''';


In [None]:
from_string_cell(examples[15])
print('\n--------------')
print(examples[15])

---------------------------------------------------------------------------
SyntaxError                              Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 6 in <from_string_cell>()

<None>, cell None, line 1
---> 1 !what is this???
       ^

[SyntaxError]: invalid syntax

--------------
!what is this???


## Find function, class and variable Names in Source Code

https://docs.python.org/3/library/ast.html

This code is using pythons builtin `ast` module to parse source code into an abstract syntax tree, from which the set of all variable-, function-, and classnames is extracted.  
All names found, that are not private (prefixed with a single underscore), are added to a set to get rid of duplicate names.  
It also seperately parses the nbdev-reserved special variable name `_all_` and adds all assignments to it to the set.  

Some special cases (like fastai specific python extensions) are also handled here, although this will probably change in the future.

### debug help

In [None]:
# +export -internal
def lineno(node):
    "Format a string containing location information on ast nodes. Used for Debugging only."
    lineno     = getattr(node, 'lineno', None)
    col_offset = getattr(node, 'col_offset', None)
    return lineno, col_offset

### Parsing

In [None]:
# +export -internal
def unwrap_attr(node:_ast.Attribute) -> str:
    "Joins a sequance of Attribute accesses together in a single string. e.g. numpy.array"
    if isinstance(node.value, _ast.Attribute): return '.'.join((unwrap_attr(node.value), node.attr))
    else: return '.'.join((node.value.id, node.attr))

In [None]:
# +export -internal
def unwrap_assign(node, names):
    "inplace, recursive update of list of names"
    if   isinstance(node, _ast.Name)      : names.append(node.id)
    elif isinstance(node, _ast.Starred)   : names.append(node.value.id)
    elif isinstance(node, _ast.Attribute) : names.append(unwrap_attr(node))
    elif isinstance(node, _ast.Subscript) : pass # e.g. a[0] = 1
    elif isinstance(node, (_ast.List, _ast.Tuple)):
        for x in node.elts: unwrap_assign(x, names)
    elif isinstance(node, list):
        for x in node: unwrap_assign(x, names)
    else: raise SyntaxError(f'Can\'t resolve {node} to name, unknown type.')

In [None]:
# +export -internal
def not_private(name): return not (name.startswith('_') and (not name.startswith('__')))

In [None]:
# +export -internal
def resolve_decorator_name(node):
    if   isinstance(node, _ast.Name): return node.id
    elif isinstance(node, _ast.Call):
        if   isinstance(node.func, _ast.Name     ): return node.func.id
        elif isinstance(node.func, _ast.Attribute): return unwrap_attr(node.func)
    elif isinstance(node, _ast.Attribute): return unwrap_attr(node)
    raise SyntaxError(f'Can\'t resolve decorator {node} to name, unknown type.')

def decorators(node): yield from (resolve_decorator_name(d) for d in node.decorator_list)

In [None]:
# +export -internal
def update_from_all_(node, names):
    "inplace, recursive update of set of names, by parsing the right side of a _all_ variable"
    if   isinstance(node, _ast.Str): names.add(node.s)
    elif isinstance(node, _ast.Name): names.add(node.id)
    elif isinstance(node, _ast.Attribute): names.add(unwrap_attr(node))
    elif isinstance(node, (_ast.List, _ast.Tuple, _ast.Set)):
        for x in node.elts: update_from_all_(x, names)
    elif isinstance(node, _ast.Subscript) :
        raise SyntaxError(f'Subscript expression not allowed in _all_.')
    elif isinstance(node, _ast.Starred):
        raise SyntaxError(f'Starred expression *{node.value.id} not allowed in _all_.')
    else: raise SyntaxError(f'Can\'t resolve {node} to name, unknown type.')

In [None]:
# +set -to_dunder_all setup MODULE_MAIN_THING 

These two functions are a mess, and hacky way to handle the fastai specific `@patch` decorator.
```python
def fastai_patch(cls, node, names):
    if   isinstance(cls, _ast.Name):
        if not_private(cls.id): names.add(f'{cls.id}.{node.name}')
    elif isinstance(cls, (_ast.List, _ast.Tuple, _ast.Set)):
            for x in cls.elts: fastai_patch(x, node, names)
    else: raise SyntaxError(f'Can\'t resolve {cls} to @patch annotation, unknown type.')

def handle_fastai_specific_logic(node, names):
    if 'patch' in decorators(node):
        if not (len(node.args.args) >= 1):
            raise SyntaxError(f'fastai\'s @patch decorator requires at least one parameter.')
        cls = node.args.args[0].annotation
        if cls is None:
            raise SyntaxError(f'fastai\'s @patch decorator requires a type annotation on the first parameter.')
        fastai_patch(cls, node, names)
        return False
    return True
```

In [None]:
# +export
@Traced
def find_names(code:str, st:StackTrace) -> (bool, set):
    "Find all function, class and variable names in the given source code."
    try: tree = ast.parse(code).body
    except SyntaxError as e: return st.report_caught_syntax_error(e), None
    names = set()
    for node in tree:
        if isinstance(node, (_ast.FunctionDef, _ast.ClassDef )):
            if not_private(node.name): names.add(node.name)
        else:
            is_assign, is_ann_assign = isinstance(node, _ast.Assign), isinstance(node, _ast.AnnAssign)
            if is_assign or is_ann_assign:
                tmp_names = list()
                if   is_assign:     unwrap_assign(node.targets, tmp_names)
                elif is_ann_assign: unwrap_assign(node.target , tmp_names)
                for name in tmp_names:
                    if not_private(name): names.add(name)
                    # NOTE: special reserved var names can only use private variable names
                    elif name == '_all_': # NOTE: _all_ is a keyword reserved by nbdev.
                        if len(tmp_names) != 1:
                            raise SyntaxError(f'Reserved keyword "_all_" can only be used in simple assignments.')
                        update_from_all_(node.value, names)
    return True, names

### Examples

In [None]:
code = """
def abc():
    pass
    
b = abc()
a, *ayy_ = range(100)
_all_ = [a, v, 'g', _i]
"""

In [None]:
find_names(code)

(True, {'_i', 'a', 'abc', 'ayy_', 'b', 'g', 'v'})

In [None]:
find_names('x = 1')

(True, {'x'})

## Relativify import statements in output file

This part is responsible for transforming import statements.  
It only affects 'from' imports of the library the project belongs to.  
So if the project library is called "my_library", then `from my_library import *` might be transformed into `from . import *` in the output file.  
The relative path is generated in such a way that it will be a valid import from the file the code is exported to.

The "normal" `import module` statement does not allow relative module names, so it can not be translated from an absolute version in the notebook to a relative one in the output file.  
Similarly, using a relative module name in the notebook in a `from .module import ...` statement does not work due to the interactive nature of the notebook environment.  
Those two cases are not supported for automatic translation since they would require a very hacky solution, which can not be guaranteed to be always correct.

In [None]:
# +export -internal
def make_import_relative(p_from:Path, m_to:str)->str:
    "Convert a module `m_to` to a name relative to `p_from`."
    mods = m_to.split('.')
    splits = str(p_from).split(os.path.sep)
    if mods[0] not in splits: return m_to
    i=len(splits)-1
    while i>0 and splits[i] != mods[0]: i-=1
    splits = splits[i:]
    while len(mods)>0 and splits[0] == mods[0]: splits,mods = splits[1:],mods[1:]
    return '.' * len(splits) + '.'.join(mods)

In [None]:
n1, n2, n3, n4, n5 = 'nbdev.core', 'nbdev.core', 'nbdev.vision.transform', 'nbdev.notebook.core', 'nbdev.vision'
p1, p2, p3 = Path('./nbdev/data.py').absolute(), Path('./nbdev/vision/data.py'), Path('./nbdev/vision/data.py')
p4, p5     = Path('./nbdev/data/external.py'), Path('./nbdev/vision/learner.py')

In [None]:
test_eq(make_import_relative(p1, n1),'.core')
test_eq(make_import_relative(p2, n2),'..core')
test_eq(make_import_relative(p3, n3),'.transform')
test_eq(make_import_relative(p4, n4),'..notebook.core')
test_eq(make_import_relative(p5, n5),'.')

```python
%%timeit
>>> 8.49 µs ± 23.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
```

In [None]:
# +export -internal
# https://docs.python.org/3/library/re.html
letter = 'a-zA-Z'
identifier = f'[{letter}_][{letter}0-9_]*'
re_import = ReLibName(fr"""
    ^                             # start of the string / line
    (\ *)                         # any amount of whitespace (indenting)
    from(\ +)                     # 'from', followed by at least one whitespace
    (LIB_NAME(?:\.{identifier})*) # Name of the library, possibly followed by dot separated submodules
    \ +import(.+)                 # whitespace, then 'import', followed by arbitrary symbols except new line
    $                             # end of the string / line
    """, re.VERBOSE | re.MULTILINE)

In [None]:
# +export
def relativify_imports(origin:Path, code:str)->str:
    "Transform an absolute 'from LIB_NAME import module' into a relative import of 'module' wrt the library."
    def repl(match):
        sp1,sp2,module,names = match.groups()
        return f'{sp1}from{sp2}{make_import_relative(origin, m_to=module)} import{names}'
    return re_import.re.sub(repl,code)

In [None]:
print(relativify_imports(Path('./nbdev_rewrite/submodule/data.py'),"""
import numpy as np, matplotlib.pyplot, moduleaaaabbb as mod
import nbdev_rewrite.vision
# Nothing to see here
from   nbdev_rewrite.abc   import array as arr, linalg.solve, module as mod
def function():
    "from nbdev_rewrite import *"
    pass
from     nbdev_rewrite import (abs, b as c, h,) # sure
from nbdev_rewrite import *
from nbdev_rewrite.core import* # ok
    from . import *
from nbdev_rewrite  import(
    abs
                  as a
    , # this is weird, but legal
                       absolute 
    as 
                  f
                  )"""))


import numpy as np, matplotlib.pyplot, moduleaaaabbb as mod
import nbdev_rewrite.vision
# Nothing to see here
from   ..abc import array as arr, linalg.solve, module as mod
def function():
    "from nbdev_rewrite import *"
    pass
from     .. import (abs, b as c, h,) # sure
from .. import *
from ..core import* # ok
    from . import *
from .. import(
    abs
                  as a
    , # this is weird, but legal
                       absolute 
    as 
                  f
                  )


## Project Initialization

In [None]:
# +export
def init_lib():
    "initialize the module folder, if it's not initialized already"
    C = Config()
    if (not C.lib_path.exists()) or (not (C.lib_path/'__init__.py').exists()):
        C.lib_path.mkdir(parents=True, exist_ok=True)
        with (C.lib_path/'__init__.py').open('w') as f:
            f.write(f'__version__ = "{C.version}"\n')
    else: pass # module *should* already exists
init_lib()

## Export Path Parsing

### identify modules

Here we use pattern matching to identify valid module names.

https://docs.python.org/2.0/ref/identifiers.html  
```
identifier:     (letter|"_") (letter|digit|"_")*
letter:         lowercase | uppercase
lowercase:      "a"..."z"
uppercase:      "A"..."Z"
digit:          "0"..."9"
```

https://docs.python.org/2.0/ref/import.html  
```
import_stmt:    "import" module ["as" name] ("," module ["as" name] )* 
              | "from" module "import" identifier ["as" name]
                ("," identifier ["as" name] )*
              | "from" module "import" "*" 
module:         (identifier ".")* identifier
```

In [None]:
# +export -internal
# https://docs.python.org/3/library/re.html
letter = 'a-zA-Z'
identifier = f'[{letter}_][{letter}0-9_]*'
module = fr'(?:{identifier}\.)*{identifier}'
module

'(?:[a-zA-Z_][a-zA-Z0-9_]*\\.)*[a-zA-Z_][a-zA-Z0-9_]*'

In [None]:
# +export -internal
# https://docs.python.org/3/library/re.html
re_match_module = re.compile(fr"""
        ^              # start of the string
        {module}       # definition for matching a module 
        $              # end of the string
        """, re.VERBOSE)

In [None]:
re_match_module.search('module.main.test')

<re.Match object; span=(0, 16), match='module.main.test'>

In [None]:
# +export
@Traced
def module_to_path(m:str, st:StackTrace)->(bool, Path):
    "Turn a module name into a path such that the exported file can be imported from the library "\
    "using the same expression."
    if re_match_module.search(m) is not None:
        if m.endswith('.py'):
            return st.report_error(ValueError(f"The module name '{m}' is not valid, because ending on '.py' "\
                                f"would produce a file called 'py.py' in the folder '{m.split('.')[-2]}', "\
                                 "which is most likely not what was intended.\nTo name a file 'py.py', use the "\
                                 "'-to_path' argument instead of '-to'.")), None
        return True, Config().path_to('lib')/f"{os.path.sep.join(m.split('.'))}.py"
    else: return st.report_error(ValueError(f"'{m}' is not a valid module name.")), None

#### Examples

In [None]:
module_to_path('module.sub.file')

(True,
 WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/nbdev_rewrite/module/sub/file.py'))

In [None]:
module_to_path('lalala :) lalala')

---------------------------------------------------------------------------
ValueError                               Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 13 in <module_to_path>()

[ValueError]: 'lalala :) lalala' is not a valid module name.


(False, None)

These functions might come in handy late on

```python
import importlib.util
# importlib.util._resolve_name
# importlib.util.resolve_name
importlib.util.resolve_name('..export', 'module.test')
>>> 'module.export'
```

### identify paths

When the user explicitly passes a path, then this code is tasked with checking it for correctness and converting it to an absolute path from the perspective of the library path.

In [None]:
# +export -internal
def commonpath(*paths)->Path:
    "Given a sequence of path names, returns the longest common sub-path."
    return Path(os.path.commonpath(paths))

In [None]:
commonpath(Path('c:/abc/fgh/a'), Path('c:/abc/fgh/b'))

WindowsPath('c:/abc/fgh')

In [None]:
# +export -internal
def in_directory(p:Path, d:Path)->bool:
    "Tests if `p` is pointing to something in the directory `d`.\n"\
    "Expects both `p` and `d` to be fully resolved and absolute paths."
    return p.as_posix().startswith(d.as_posix())

In [None]:
def in_directory_slow_1(p, d)->bool:
    try: p.relative_to(d)
    except: return False
    else: return True
def in_directory_slow_2(p, d)->bool:
    return len(commonpath(p, d).parts) >= len(d.parts)

In [None]:
in_directory(p=Path('C:/abc/fgh/abc.txt'), d=Path('C:/abc/fgh/'))

True

In [None]:
# +export
@Traced
def make_valid_path(s:str, st:StackTrace)->(bool, Path):
    "Turn a export path argument into a valid path, resolving relative paths and checking for mistakes."
    config = Config()
    p, lib, proj = Path(s), config.path_to('lib'), config.config_file.parent
    is_abs = p.is_absolute()
    p = (p if is_abs else (lib/p)).absolute().resolve()
    if (not is_abs) and (not in_directory(p, proj)):
        return st.report_error(ValueError("Relative export path beyond top level directory of project "\
                                          "is not allowed by default. Use an absolute path, "\
                                          f"or set <NOT IMPLEMENTED YET> flag on the command. ('{s}')")), None
    if not p.suffix:
        return st.report_error(ValueError(f"The path '{s}' is missing a file type suffix like '.py'.")), None
    if p.suffix == '.py': return True, p
    else: return st.report_error(ValueError(f"Expected '.py' file ending, but got '{p.suffix}'. ('{s}')")), None

#### Examples

In [None]:
make_valid_path(Path('./module/../hi.py'))

(True,
 WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/nbdev_rewrite/hi.py'))

In [None]:
make_valid_path('main.py')
make_valid_path('./main.py')
make_valid_path('../../nbdev_rewrite/nbdev_rewrite/main.py')
make_valid_path('d:/main.py')
make_valid_path('main/main.py')
make_valid_path('../nbdev_rewrite/main.py')

(True,
 WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/nbdev_rewrite/main.py'))

In [None]:
make_valid_path('test.pyy')

---------------------------------------------------------------------------
ValueError                               Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 16 in <make_valid_path>()

[ValueError]: Expected '.py' file ending, but got '.pyy'. ('test.pyy')


(False, None)

In [None]:
make_valid_path('../test.py')

(True, WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/test.py'))

In [None]:
make_valid_path('../../test.py')

---------------------------------------------------------------------------
ValueError                               Stacktrace (most recent call last)
<__main__>, line 1 in <module>
<__main__>, line 10 in <make_valid_path>()

[ValueError]: Relative export path beyond top level directory of project is not allowed by default. Use an absolute path, or set <NOT IMPLEMENTED YET> flag on the command. ('../../test.py')


(False, None)

## Main

### Register Commands

`@register_command` stores argument information about the registered function in the global variables `all_commands`, and a reference to the function in `cmd2func`.

In [None]:
# +export
def register_command(cmd, args, active=True):
    "Store mapping from command name to args, and command name to reference to the decorated function in globals."
    if not active: return lambda f: f
    all_commands[cmd] = args
    def _reg(f):
        cmd2func[cmd] = f
        return f
    return _reg

In [None]:
# +export
all_commands = {}
cmd2func     = {}

Be careful with the order of the decorators!!!  
Since `@register_command` stores a reference to the function, and `@traced` modifies the function, they are not commutative!

In [None]:
# +export
@register_command(cmd='default_exp', # allow custom scope name that can be referenced in export?
                  args={'to': '', 'to_path': '', 'no_dunder_all': False, 'scoped': False})
@Traced
def kw_default_exp(file_info, cell_info, result, is_set, st:StackTrace) -> bool:
    "Set the default file that cells of this notebook will be exported to."
    success:bool = True
    if not (is_set['to'] ^ is_set['to_path']): # NOTE: XOR
        return st.report_error(ValueError("The `default_exp` command expects exactly one of the arguments "\
                               f"'-to' or '-to_path' to be set, but recieved was: {result}"))
    # NOTE: use this cells indentation level, or the default tuple([0]) as key to identify scope
    scope:tuple     = cell_info['scope'] if result['scoped'] else tuple([0])
    old_target:Path = file_info['export_scopes'].get(scope, None)
    conv_success, new_target = (module_to_path(result['to'], st=st)
                                if is_set['to'] else
                                make_valid_path(result['to_path'], st=st))
    if not conv_success: return False
    if old_target is not None:
        if old_target['target'] != new_target:
            return st.report_error(ValueError(f"Overwriting an existing export target is not allowed."\
                            f"\n\twas (cell {old_target['cell_info']['cell_nr']}): '{old_target['target']}'"\
                            f"\n\tnew (cell {cell_info['cell_nr']}): '{new_target}'"))
        else: pass # TODO: issue a warning in this case
    file_info['export_scopes'][scope] = {
        'target' : new_target,
        'add_dunder_all' : (not result['no_dunder_all']),
        'cell_info' : cell_info,
    }
    return success

In [None]:
assert kw_default_exp == cmd2func['default_exp'], 'are the decorators in the right order?'

In [None]:
# +export
@register_command(cmd='export',
                  args={'internal': False, 'to': '', 'to_path':'', 'ignore_scope':False, 'from_string':False})
@Traced
def kw_export(file_info, cell_info, result, is_set, st:StackTrace) -> bool:
    "This cell will be exported from the notebook to a .py file."
    success:bool = True
    if (is_set['to'] and is_set['to_path']):
        return st.report_error(ValueError("The `export` command does not accept the '-to' and '-to_path' "\
                               f"argument at the same time. They are mutually exclusive. Received: {result}"))
    cell_info['export_to_py'] = True # Using this command implicitly means to export this cell
    if result['from_string']:
        # TODO: unwrap cell content
        # from_string_cell(cell_info['original_source_code'])
        pass
    is_internal = cell_info['is_internal'] = result['internal']
    if is_internal: pass # no contained names will be added to __all__ for importing
    else: success, cell_info['names'] = find_names(cell_info['original_source_code'])
    conv_success, export_target = True, None
    if is_set['to'     ]: conv_success, export_target = module_to_path (result['to'], st=st)
    if is_set['to_path']: conv_success, export_target = make_valid_path(result['to_path'], st=st)
    if not conv_success: return False
    if export_target is not None:
        if is_set['ignore_scope']:
            return st.report_error(ValueError("Setting 'ignore_scope' is not allowed when "\
                                   f"exporting to a custom target using 'to' or 'to_path'."))
        cell_info['export_to'].append(export_target) # Set a new export target just for this cell.
    else:
        if result['ignore_scope']: cell_info['export_to_default'] += 1
        else:                      cell_info['export_to_scope']   += 1
    return success

In [None]:
assert kw_export == cmd2func['export'], 'are the decorators in the right order?'

In [None]:
@register_command(cmd='set',
                  args={'file': '', 'use_names': True},
                  active=False)
@Traced
def kw_set(file_info, cell_info, result, is_set, st:StackTrace) -> bool:
    "set some predefined variables that control execution behaviour"
    success:bool = True
    return success

#### Documentation

Command: `default_exp`  
Set the default file that cells of this notebook will be exported to.  
Args:
- `to`: The target file written in a python module form. 
- `to_path`: The target file as a relative or absolute path.
- `scoped`: Flag for setting the export target only for the scope that the command has been invoked from. Scopes are implicitly set by markdown cells with different levels of headings.
- `no_dunder_all` : The target file will not have a `__all__` defined.

Command: `export`  
This cell will be exported from the notebook to a .py file.  
Args:  
- `internal`: The variable, function and class names of this cell will not be added to `__all__` in the exported file, making them hidden from any `import *`.
- `to`: Instead of exporting to the notebook or scope wide default file, this cell is exported to the file specified in this argument. File is written in python module form.
- `to_path`: The same as `to`, but this argument is written as a path.
- `ignore_scope`: This cell ignores any export targets set for the scope it resides in, and instead always uses the default for the entire notebook. This argument is incompatible with `to` and `to_path`.

Command: `set`  
Set some predefined variables that control execution behaviour.  
Args:  
- `file`: If this is set, the variables will only be set on this specific file.
- `use_names`: Control whether or not a `__all__` with all (non internal) variable, function and class names should be inserted at the top of the file. Default is `True`.

### ToDo

- Improve `init_lib()`
- Generalize the `relative_path()` function currently under error reporting
- Maybe add reverse mapping from  commands to cells that use those commands
- Add better debugging information.
- Think about how a change to a .py file could be re-ported into the notebook.
- fix settings propagation to imported modules, e.g. Argument Parsing print options don't work properly when they are a second hand import.
- Only keep cells that contain commands / ones that are supposed to be exported and throw away the rest as soon as possible. This could become important if notebooks contain lots of large images or other data.
    - Keeping all the cells is done in anticipation of handing over control to a meta program, which might still need those cells we deem unnecessary, and because it is easier to modify the program if the original state of all cells is available at every step, only with additional state being added.
- Should `find_names()` be executed at the very end, during exporting, after it has been decided if it is actually necessary? Even if it's not `internal`, its export scope might have `no_dunder_all` set.
- in name parsing `find_names()`:
    - improve error messages
    - replace `SyntaxError` with something like `CompilerError` in `find_names()`, to signal to the user that it's not their fault, but a missing case in this program.
    - remove fastai specific code from the `find_names()` code, and instead implement that logic in a meta program
- in commands:
    - Add aliases to commands / arguments?
    - add a `names_only` flag to the `export` command or a command to directly add names to `__all__`, similar to the current `_all_` hack?
    - Add `auto` flag to `default_exp` to automatically determine export path and file name from notebook directory and name.
    - Add a `file_documentation` command, for writing a doc string for an entire file, or for an entire module?
- in file writing:
    - Parallelize file writing
    - initialize a python package, if it doesn't already exists
    - Support Automatic / Explicit Versioning
- global settings:
    - seperation amount (vertical whitespace) between cells
    - adding callbacks?
    - overwriting parsing functions?
    - recursively search for files?
    - overwrite config paths?
    - Most of these should probably NOT be allowed, because (a) it would be a nightmare to reason about, (b) if file 1 sets the setting one way and file 2 sets it a different way, how do we deal with that?, (c) there already is a way to do some of these settings, namely in the config. Global settings should be done in the config.

### Load

In [None]:
# +export
_reserved_dirs = (Config().lib_path, Config().doc_path)
def crawl_directory(path:Path, recurse:bool=True) -> list:
    "Crawl the `path` directory for a list of .ipynb files."
    # TODO: Handle symlinks?
    if isinstance(path, (list, tuple, set)):
        for p in path: yield from crawl_directory(p, recurse)
    else:
        if path.is_file(): yield path
        else:
            for p in path.iterdir():
                fn = p.name
                if fn.startswith('.') or fn.startswith('_'): continue
                if p.is_file():
                    if fn.endswith('.ipynb'): yield p
                    else: continue
                elif p.is_dir() and recurse:
                    if p in _reserved_dirs: continue
                    else: yield from crawl_directory(p, recurse)
                else: continue

In [None]:
# +export
def read_nb(fname:Path) -> dict:
    "Read the `fname` notebook."
    with open(Path(fname),'r', encoding='utf8') as f: return dict(nbformat.reads(f.read(), as_version=4))

In [None]:
# +export
@prefetch(max_prefetch=-1) # NOTE: max_prefetch <= 0 means the queue size is infinite
def async_load_notebooks(path:Path=Config().nbs_path, recurse:bool=True) -> (Path, dict):
    "Crawl for notebooks in the `path` directory, and load in a background thread."
    for file_path in crawl_directory(path, recurse): yield (file_path, read_nb(file_path))

#### Examples

In [None]:
list(crawl_directory(Config().nbs_path))

[WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/notebooks/00_export_v4.ipynb'),
 WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/notebooks/99_index.ipynb')]

In [None]:
nb = read_nb(THIS_FILE)
len(nb['cells'])

302

In [None]:
# [len(x[1]['cells']) for x in async_load_notebooks()]
#>[90, 2, 100, 174, 31, 2]

### Parsing

In [None]:
# +export -internal
# https://docs.python.org/3/library/re.html
re_match_heading = re.compile(r"""
        ^              # start of the string
        (\#+)\s+       # 1 or more literal "#", then 1 or more whitespace
        (.*)           # group of arbitrary symbols (including new line)
        $              # end of the string
        """,re.IGNORECASE | re.VERBOSE | re.DOTALL)

In [None]:
res = re_match_heading.search('## test')
res.groups()

('##', 'test')

In [None]:
# +export
class DictLikeAccess():
    __slots__ = []
    def __getitem__(self, key):        return getattr(self, key)
    def __setitem__(self, key, value): return setattr(self, key, value)

In [None]:
# +export
class DictLikeRepr():
    __slots__ = []
    def __repr__(self):
        s = self.__class__.__name__ + ' {\n'
        for key in self.__slots__:
            s += f'\t{key} : {getattr(self, key, None).__repr__()},\n'
        return s+'}'

In [None]:
class ExportUnit(DictLikeAccess, DictLikeRepr):
    __slots__ = ('cell_nr', 'scope', 'is_internal', 'names',
                 'source_code', 'export_to',
                 'export_to_scope', 'export_to_default')
    def __init__(self, cell_nr, scope, source_code):
        self.cell_nr           = cell_nr
        self.scope             = scope
        self.source_code       = source_code
        self.is_internal       = None
        self.names             = None
        self.export_to         = list()
        self.export_to_scope   = 0
        self.export_to_default = 0

In [None]:
t = ExportUnit(0, (0,), 'x=1')

In [None]:
t['cell_nr']

0

In [None]:
t.scope

(0,)

In [None]:
t

ExportUnit {
	cell_nr : 0,
	scope : (0,),
	is_internal : None,
	names : None,
	source_code : 'x=1',
	export_to : [],
	export_to_scope : 0,
	export_to_default : 0,
}

In [None]:
# +export
class FileInfo(DictLikeAccess, DictLikeRepr):
    __slots__ = ('origin_file', 'relative_origin', 'nb_version',
                 'export_scopes', 'cells', 'export_units')
    def __init__(self, origin_file, nb_version,
                 export_scopes=None, cells=None, export_units=None):
        self.origin_file     = origin_file
        self.relative_origin = os.path.relpath(origin_file, Config().config_file.parent).replace('\\', '/')
        self.nb_version      = nb_version
        self.export_scopes   = {(0,): None} if (export_scopes is None) else export_scopes
        self.cells           = list()       if (cells         is None) else cells
        self.export_units    = list()       if (export_units  is None) else export_units

In [None]:
p = Path(THIS_FILE).absolute().resolve()

In [None]:
t = FileInfo(p, (3, 7))

In [None]:
t['cells']

[]

In [None]:
t['origin_file'] = '1'

In [None]:
t['origin_file']

'1'

In [None]:
t2 = FileInfo(p, (3, 7))

In [None]:
t.cells.append(1)

In [None]:
t.cells

[1]

In [None]:
t2.cells

[]

In [None]:
t

FileInfo {
	origin_file : '1',
	relative_origin : 'notebooks/00_export_v4.ipynb',
	nb_version : (3, 7),
	export_scopes : {(0,): None},
	cells : [1],
	export_units : [],
}

In [None]:
t2

FileInfo {
	origin_file : WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/notebooks/00_export_v4.ipynb'),
	relative_origin : 'notebooks/00_export_v4.ipynb',
	nb_version : (3, 7),
	export_scopes : {(0,): None},
	cells : [],
	export_units : [],
}

In [None]:
# +export
@Traced
def parse_file(file_path:Path, file:dict, st:StackTrace) -> (bool, dict):
    success = True
    pure_comments_only = True
    nb_version:(int, int) = (file['nbformat'], file['nbformat_minor'])
    metadata  :dict       =  file['metadata']
    
    file_info = FileInfo(origin_file = file_path,
                         nb_version  = nb_version)
#     file_info = {
#         'origin_file': file_path,
#         'relative_origin': os.path.relpath(file_path, Config().config_file.parent).replace('\\', '/'),
#         'nb_version': nb_version,
#         'export_scopes': {
#             (0,): None, # NOTE: (0,) maps to the default target for an entire file.
#         },
#         'cells': list(),
#         'export_units' : list(),
#     }
    scope_count :[int] = [0]
    scope_level :int   = 0
    
    cells:list = file_info['cells']
    
    st.ext(file=file_info['relative_origin'])
    
    for i, cell in enumerate(file['cells']):
        cell_type   = cell['cell_type']
        cell_source = cell['source']
        cell_info = {
            'cell_nr' : i,
            'cell_type' : cell_type,
            'original_source_code' : cell_source,
            'processed_source_code': cell_source,
            'scope' : tuple(scope_count),
            'export_to_py' : False,
            'export_to_scope' : 0,
            'export_to_default' : 0,
            'is_internal' : None,
            'export_to' : [],
            'names' : None,
            'comments' : []
        }
        if cell_type == 'code':
            st.ext(cellno = i)
            comments_to_remove = []
            for comment, (lineno, charno) in iter_comments(cell_source, pure_comments_only, line_limit=None):
                st.ext(lineno = lineno + 1) # zero counting offset
                st.ext(excerpt = comment)
                parsing_success, cmd, result, is_set = parse_comment(all_commands,comment,st=st)
                if not parsing_success: continue
                # TODO: cound nr of found cells
                if main_REPORT_COMMAND_FOUND:
                    print(f'Found: {cmd} @ ({i}, {lineno}, {charno}) with args: {result}')
                if cmd in cmd2func:
                    cmd_success = cmd2func[cmd](file_info, cell_info, result, is_set, st=st)
                    if not cmd_success: return False, file_info # TODO: Stop at first error or continue?
                else: raise ValueError(f"The command '{cmd}' in cell number {i} is recognized, "\
                                        "but is missing a corresponding action mapping in cmd2func.")
                cell_info['comments'].append(comment)
                comments_to_remove.append((lineno, charno))
            if len(comments_to_remove) > 0:
                lines = cell_source.splitlines()
                if pure_comments_only:
                    for lineno, charno in comments_to_remove[::-1]: lines.pop(lineno)
                else:
                    for lineno, charno in comments_to_remove[::-1]: lines[lineno] = lines[lineno][:charno]
                cell_info['processed_source_code'] = '\n'.join(lines)
            
        elif cell_type == 'markdown':
            res = re_match_heading.search(cell_source)
            if not (res is None): # this cell contains a heading
                heading_level, heading_name = res.groups()
                new_scope_level = len(heading_level) # number of '#' in the heading
                if new_scope_level > scope_level:
                    scope_count += ([0] * (new_scope_level - (len(scope_count)))) # extend list if necessary
                elif new_scope_level < scope_level:
                    scope_count = scope_count[:new_scope_level] # reset lower values
                scope_count[new_scope_level - 1] += 1
                scope_level = new_scope_level
            else: pass # this cell is regular markdown
        elif cell_type == 'raw': pass
        else: raise ValueError(f"Unknown cell_type '{cell_type}' in cell number {i}."\
                                "Should be 'code', 'markdown', or 'raw'.")
        cells.append(cell_info)
    return success, file_info

In [None]:
# +export
@Traced
def parse_all(file_generator, st:StackTrace) -> (bool, dict):
    "Loads all .ipynb files in the origin_path directory, and passes them one at a time to parse_file."
    success:bool = True
    parsed_files = {
        # Add flags and settings variables above this line
        'files': list()
    }
    # TODO: use multithreading / multiprocessing per file / per n cells
    for file_path, file in file_generator:
        # if file_path.name != THIS_FILE: continue # For Debugging
        parse_success, file = parse_file(file_path, file, st=st)
        if not parse_success:
            success = False # TODO: Stop at first error or continue?
        # TODO: before returning, give any meta programm a chance to run.
        # maybe have parse_file return some additional information about any meta programm
        parsed_files['files'].append(file)
        
    return success, parsed_files

### Merging

In [None]:
# +export
@Traced
def merge_all(parsed_files:dict, st:StackTrace) -> (bool, dict):
    success:bool = True
    config    = Config()
    lib_path  = config.lib_path
    nbs_path  = config.nbs_path
    proj_path = config.config_file.parent
    zero_tuple = (0,)
    
    # NOTE: This will contain all the merges files
    export_files = defaultdict(lambda: {'names': set(), 'code': [], 'orig': None, 'add_dunder_all':None})
    
    for file_info in parsed_files['files']:
        rel_orig:str = file_info['relative_origin']
        st.ext(file=rel_orig)
        st.ext_clear_cellno() # NOTE: Clear cellno, because this is a new file.
        scopes:dict  = file_info['export_scopes']
        assert zero_tuple in scopes, 'No default in export Scopes.'
        scopes_available:bool = (len(scopes) > 1)
        default_scope   :dict = scopes[zero_tuple]
        # NOTE: Having no default is ok, as long as all cells still have a valid export target
        none_default    :bool = (default_scope is None)
        default_export  :Path = None if none_default else default_scope['target']
            
        if not none_default:
            # NOTE: Set this notebooks default as the origin of one of the `export_files`.
            default_state:dict = export_files[default_export]
            if (default_state['orig'] is None): default_state['orig'] = rel_orig
            else: return st.report_error(
                ValueError(f'Multiple files have {default_export} as the default export target. '\
                           f'(old: {default_state["orig"]} | new: {rel_orig})')), None
                
        for cell in file_info['cells']:
            # NOTE: At this point, the `file_info` still contains all of the original cells of the notebook
            if not cell['export_to_py']: continue
            st.ext(cellno=cell["cell_nr"])
            info_string = f"# {'Internal ' if cell['is_internal'] else ''}Cell nr. {cell['cell_nr']}"
            info_string_src = (info_string + f"; Comes from '{rel_orig}'")
            
            # NOTE: Handle a cell directly specifying its export target
            if len(cell['export_to']) > 0:
                for to in cell['export_to']:
                    state:dict = export_files[to]
                    if not cell['is_internal']: state['names'].update(cell['names'])
                    state['code'].append(f"{info_string_src}\n{relativify_imports(to, cell['processed_source_code'])}")
            
            # NOTE: Handle a cell belonging to a scope and find the best match
            if scopes_available:
                if cell['export_to_scope'] > 0:
                    # Do scope matching
                    cell_scope:tuple = cell['scope']
                    best_fit = zero_tuple
                    best_fit_len = 0
                    # NOTE: The number of scopes should usually be relatively small, so this should be fine.
                    for k in scopes.keys():
                        if ((len(k) > best_fit_len) # Trying to find the tightest fit
                            and (k == cell_scope[:len(k)])): # iff cell is part of this scope
                            best_fit, best_fit_len = k, len(k)
                    to:Path = scopes[best_fit]['target']
                    if (best_fit == zero_tuple) or (to == default_export):
                        cell['export_to_default'] += cell['export_to_scope']
                        cell['export_to_scope'] = 0
                        pass
                    else:
                        state:dict = export_files[to]
                        if not cell['is_internal']: state['names'].update(cell['names'])
                        for _ in range(cell['export_to_scope']):
                            state['code'].append(f"{info_string_src}\n{relativify_imports(to, cell['processed_source_code'])}")
            else:
                cell['export_to_default'] += cell['export_to_scope']
                cell['export_to_scope'] = 0
            
            # NOTE: Handle a cell ignoring all scopes, or being in the default scope.
            if cell['export_to_default'] > 0:
                if none_default:
                    return st.report_error(ValueError(f'Cell does not have a export target. '\
                                     'Did you forget to add a default target using `default_exp`?')), None
                to:Path = default_export
                state:dict = export_files[to]
                if not cell['is_internal']: state['names'].update(cell['names'])
                for _ in range(cell['export_to_default']):
                    state['code'].append(f"{info_string}\n{relativify_imports(to, cell['processed_source_code'])}")
        # NOTE: Set 'add_dunder_all' and check for mismatches
        for k, v in scopes.items(): # for all scopes that this files exports to
            if v is None: continue
            state = export_files[v['target']]
            if   state['add_dunder_all'] is None: # it defaults to None, see top of the function
                 state['add_dunder_all'] = v['add_dunder_all']
            elif state['add_dunder_all'] == v['add_dunder_all']:
                continue
            else:
                # TODO: To improve this error message further, information about which previous files / cells
                # affected the same export state are necessary
                return st.report_error(ValueError('Multiple `default_exp` commands which specify the same target '\
                                        'cannot have different values for the `no_dunder_all` argument.\n'\
                                       f"The value defined in cell nr {v['cell_info']['cell_nr']} in '{rel_orig}' "\
                                       f'does not match with a previous definition.')), None
        # NOTE: The files can't yet be written, because there might be other notebooks exporting to the same files.
        # NOTE: This is the end of the "for each file" loop
    return success, export_files

### Writing

In [None]:
def stringify_names(names:set, sep='\n\n\n')->str:
    start, part = "__all__ = [", ''
    for name in sorted(names):
        if len(part) + len(name) < 90:
            part = f"{part}'{name}', "
        else:
            start += (part + '\n')
            part = f"           '{name}', "
    return f'{sep}{start}{part[:-2]}]'

In [None]:
def stringify_names_2(names):
    return f"\n\n\n__all__ = ['{', '.join(sorted(names))}']"

```python
test_data = ['abc'] * 1000
```

```python
%timeit stringify_names(test_data)
>>> 242 µs ± 489 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
```

```python
%timeit stringify_names_2(test_data)
>>> 18 µs ± 146 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
```

In [None]:
# +export
@Traced
def write_file(to:Path, state:dict, st:StackTrace) -> bool:
    success:bool = True
    orig:str  = state['orig']
    names:set = state['names']
    code:list = state['code']
    add_dunder_all:bool = state['add_dunder_all']
    sep:str = '\n\n\n'
    if orig is None:
        warning = f'# AUTOGENERATED! DO NOT EDIT! View info comment on each cell for file to edit.'
    else:
        warning = f'# AUTOGENERATED! DO NOT EDIT! File to edit: {orig} (unless otherwise specified).'
    if add_dunder_all:
        if len(names) > 0:
            # TODO: add line breaks at regular intervals
            comma = "', '"
            dunder_all = f"{sep}__all__ = ['{comma.join(sorted(names))}']"
        else: dunder_all = f'{sep}__all__ = []'
    else: dunder_all = ''
    code :str = sep + sep.join(code)
    file_content:str = f'{warning}{dunder_all}{code}'
    to.parent.mkdir(parents=True, exist_ok=True)
    try:
        with open(to, 'w', encoding='utf8') as f: f.write(file_content)
    except Exception as e: return st.report_error(e)
    return success

In [None]:
# +export
@Traced
def write_all(merged_files:dict, st:StackTrace) -> bool:
    # print(dict(export_files))
    success:bool = True
    for to, state in merged_files.items():
        write_success = write_file(to=to, state=state, st=st)
        if not write_success: return False
    return success

### Main()

In [None]:
# +export
@Traced
def main(nbs_path:str=None, lib_path:str=None, recurse:bool=True, st:StackTrace=None) -> (bool, dict, dict):
    "Load, Parse, Merge, and Write .ipynb files to .py files."
    success:bool = True
    config = Config()
    proj_path:Path = config.config_file.parent
    nbs_path:Path  = config.nbs_path if (nbs_path is None) else Path(nbs_path).absolute().resolve()
    lib_path:Path  = config.lib_path if (lib_path is None) else Path(lib_path).absolute().resolve()
    
    # NOTE: LOAD
    notebooks = async_load_notebooks(path=nbs_path, recurse=recurse)
    
    # NOTE: PARSE
    parse_success, parsed_files = parse_all(notebooks, st=st)
    if not parse_success:
        st.report_error(Exception('At least one Error has occured during parsing. '\
                                  'No files on disk have been modified. Exiting.'))
        return False, parsed_files, None
    
    # NOTE: MERGE
    merge_success, merged_files = merge_all(parsed_files, st=st)
    if not merge_success:
        st.report_error(Exception('At least one Error occured during merging of files to be exported. '\
                                  'No files on disk have been modified. Exiting.'))
        return False, parsed_files, merged_files
    
    # NOTE: WRITE
    write_success = write_all(merged_files, st=st)
    if not write_success:
        st.report_error(Exception('At least one Error occured during writing the parsed and merged files to disk. '\
                                  'Some files might have been written to disk and others might not. Exiting.'))
        return False, parsed_files, merged_files
    
    if main_REPORT_RUN_STATISTICS:
        report_successful_export(parsed_files, merged_files)
    # NOTE: RETURN
    return success, parsed_files, merged_files

## Run

In [None]:
# +export
set_arg_parse_report_options(report_error=False)
set_main_report_options(report_optional_error=False,
                        report_command_found=False,
                        report_run_statistics=True)

In [None]:
success, parsed_files, merged_files = main();

2 notebooks have been parsed, resulting in 4 python files.

The following 2 notebooks have been parsed:
-------------------------------------------
notebooks/00_export_v4.ipynb (302 cells total)
---> default:	nbdev_rewrite/main.py
---> (1,):	setup.py
---> (2,):	nbdev_rewrite/argument_parsing.py
---> (3,):	nbdev_rewrite/imports.py
notebooks/99_index.ipynb (23 cells total)
---> default:	None

Of the 2 notebooks parsed, 1 is outputting code.

The following 4 python files have been generated:
-------------------------------------------------
---> 48 cells output to nbdev_rewrite/main.py
---> 25 cells output to setup.py
---> 14 cells output to nbdev_rewrite/argument_parsing.py
---> 20 cells output to nbdev_rewrite/imports.py



In [None]:
from nbdev_rewrite.main import *

In [None]:
success, parsed_files, merged_files = main();

2 notebooks have been parsed, resulting in 4 python files.

The following 2 notebooks have been parsed:
-------------------------------------------
notebooks/00_export_v4.ipynb (302 cells total)
---> default:	nbdev_rewrite/main.py
---> (1,):	setup.py
---> (2,):	nbdev_rewrite/argument_parsing.py
---> (3,):	nbdev_rewrite/imports.py
notebooks/99_index.ipynb (23 cells total)
---> default:	None

Of the 2 notebooks parsed, 1 is outputting code.

The following 4 python files have been generated:
-------------------------------------------------
---> 48 cells output to nbdev_rewrite/main.py
---> 25 cells output to setup.py
---> 14 cells output to nbdev_rewrite/argument_parsing.py
---> 20 cells output to nbdev_rewrite/imports.py



In [None]:
parsed_files['files'][0]['export_scopes']

{(0,): {'target': WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/nbdev_rewrite/main.py'),
  'add_dunder_all': True,
  'cell_info': {'cell_nr': 103,
   'cell_type': 'code',
   'original_source_code': '# +default_exp -to main',
   'processed_source_code': '',
   'scope': (4,),
   'export_to_py': False,
   'export_to_scope': 0,
   'export_to_default': 0,
   'is_internal': None,
   'export_to': [],
   'names': None,
   'comments': ['# +default_exp -to main']}},
 (1,): {'target': WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/setup.py'),
  'add_dunder_all': False,
  'cell_info': {'cell_nr': 1,
   'cell_type': 'code',
   'original_source_code': '# +default_exp -to_path ../setup.py -scoped -no_dunder_all',
   'processed_source_code': '',
   'scope': (1,),
   'export_to_py': False,
   'export_to_scope': 0,
   'export_to_default': 0,
   'is_internal': None,
   'export_to': [],
   'names': None,
   'comments': ['# +default_exp -to_path ../setup.py -scoped -no_dunder_a

In [None]:
[c for c in parsed_files['files'][0]['cells'] if c['export_to_py']];

## Develop new Stuff

In [None]:
Config().lib_path == Config().path_to('lib_path') == Config().path_to('lib')

True

In [None]:
lib = Config().path_to('lib'); lib

WindowsPath('//DESKTOP-MDPTPCT/Projects/GitHub/nbdev_rewrite/nbdev_rewrite')

### regex for matching import statements

https://docs.python.org/3.0/reference/simple_stmts.html#the-import-statement  
```
import_stmt     ::=  "import" module ["as" name] ( "," module ["as" name] )*
                     
                     | "from" relative_module "import" identifier ["as" name]
                     ( "," identifier ["as" name] )*
                     
                     | "from" relative_module "import" "(" identifier ["as" name]
                     ( "," identifier ["as" name] )* [","] ")"
                     
                     | "from" module "import" "*"
module          ::=  (identifier ".")* identifier
relative_module ::=  "."* module | "."+
name            ::=  identifier
```

In [None]:
# https://docs.python.org/3/library/re.html
letter = 'a-zA-Z'
identifier = f'[{letter}_][{letter}0-9_]*'
module = fr'(?:{identifier}\.)*{identifier}'
module

'(?:[a-zA-Z_][a-zA-Z0-9_]*\\.)*[a-zA-Z_][a-zA-Z0-9_]*'

In [None]:
relative_module = fr'(?:\.*{module}|\.+)'
name = identifier

In [None]:
as_name  = fr'(?:\ +as\ +{name})'
as_name  = fr'{as_name}?(?:\ *,\ *{module}{as_name}?)*'

import_1 = fr'import\ +({module})({as_name})'

import_2 = fr'from\ +({relative_module})\ +import\ +({identifier}{as_name})'

as_name_s  = fr'(?:\s+as\s+{name})'
as_name_s  = fr'{as_name_s}?(?:\s*,\s*{module}{as_name_s}?)*'
import_3   = fr'from\ +({relative_module})\ +import\ *(\(\s*{identifier}{as_name_s}\s*,?\s*\))'

# NOTE: The docs say 'module', but in reality relative imports work as well.
import_4 = fr'from\ +({relative_module})\ +import\ *\*'

# NOTE: import_1 is not included, because it doesn't allow relative imports.
import_stmt = fr'(?:{import_2}|{import_3}|{import_4})'
import_stmt

'(?:from\\ +((?:\\.*(?:[a-zA-Z_][a-zA-Z0-9_]*\\.)*[a-zA-Z_][a-zA-Z0-9_]*|\\.+))\\ +import\\ +([a-zA-Z_][a-zA-Z0-9_]*(?:\\ +as\\ +[a-zA-Z_][a-zA-Z0-9_]*)?(?:\\ *,\\ *(?:[a-zA-Z_][a-zA-Z0-9_]*\\.)*[a-zA-Z_][a-zA-Z0-9_]*(?:\\ +as\\ +[a-zA-Z_][a-zA-Z0-9_]*)?)*)|from\\ +((?:\\.*(?:[a-zA-Z_][a-zA-Z0-9_]*\\.)*[a-zA-Z_][a-zA-Z0-9_]*|\\.+))\\ +import\\ *(\\(\\s*[a-zA-Z_][a-zA-Z0-9_]*(?:\\s+as\\s+[a-zA-Z_][a-zA-Z0-9_]*)?(?:\\s*,\\s*(?:[a-zA-Z_][a-zA-Z0-9_]*\\.)*[a-zA-Z_][a-zA-Z0-9_]*(?:\\s+as\\s+[a-zA-Z_][a-zA-Z0-9_]*)?)*\\s*,?\\s*\\))|from\\ +((?:\\.*(?:[a-zA-Z_][a-zA-Z0-9_]*\\.)*[a-zA-Z_][a-zA-Z0-9_]*|\\.+))\\ +import\\ *\\*)'

In [None]:
# https://docs.python.org/3/library/re.html
re_test = re.compile(fr"""
        ^              # start of the string
        (\ *)          # capturing group of any amount of whitespace (indenting)
        {import_stmt}  # definition for matching a module 
        \ *            # non-capturing whitespace
                       # TODO: match any remaining character in case of e.g. comments
        $              # end of the string
        """, re.VERBOSE | re.MULTILINE)

In [None]:
re_test.search('import numpy as np, matplotlib.pyplot, moduleaaaabbb as mod') # import_1

In [None]:
re_test.search('from numpy import array as arr, linalg.solve, module as mod').group() # import_2

'from numpy import array as arr, linalg.solve, module as mod'

In [None]:
re_test.search('from numpy import (abs, b as c, h,)').group() # import_3

'from numpy import (abs, b as c, h,)'

In [None]:
re_test.search('from numpy import *').group() # import_4
re_test.search('from . import *').group() # import_4

'from . import *'

In [None]:
_The_Name = 'numpy'
# import_stmt
def repl(match):
    print(match.groups())
    sp, n2, a2, n3, a3, n4 = match.groups()
    if n2:
        if n2 == _The_Name: return f'{sp}from <REL>{n2} import {a2}'
        else: return f'{sp}from {n2} import {a2}'
    elif n3:
        if n3 == _The_Name: f'{sp}from <REL>{n3} import {a3}'
        else: return f'{sp}from {n3} import {a3}'
    elif n4:
        if n4 == _The_Name: f'{sp}from <REL>{n4} import *'
        else: return f'{sp}from {n4} import *'

res = re_test.sub(repl, """
import numpy as np, matplotlib.pyplot, moduleaaaabbb as mod
# Nothing to see here
from numpy import array as arr, linalg.solve, module as mod
def function():
    pass
from numpy import (abs, b as c, h,)
from numpy import *
    from . import *
from numpy  import(
    abs
                  as a
    ,
                       absolute 
    as 
                  f
                  )""")
print(res)

('', 'numpy', 'array as arr, linalg.solve, module as mod', None, None, None)
('', None, None, 'numpy', '(abs, b as c, h,)', None)
('', None, None, None, None, 'numpy')
('    ', None, None, None, None, '.')
('', None, None, 'numpy', '(\n    abs\n                  as a\n    ,\n                       absolute \n    as \n                  f\n                  )', None)

import numpy as np, matplotlib.pyplot, moduleaaaabbb as mod
# Nothing to see here
from <REL>numpy import array as arr, linalg.solve, module as mod
def function():
    pass


    from . import *

