## Argument parsing

### prints

In [None]:
def report_error(err:Exception):
    err_type = err.__class__.__name__
    print(f'[{err_type}]: {err}')

In [None]:
def report_warning(warn:str): print(f'[Warning]: {warn}')

### Next Argument

In [None]:
def get_next_argument(args:list, name:str, cursor:int, suppress_error:bool=False) -> (bool, int, str):
    "Gets the next argument from the list.\nReturns success, the cursor, and the next argument"
    cursor_1 = cursor + 1
    try: return True, cursor_1, args[cursor_1]
    except IndexError:
        if not suppress_error:
            report_error(SyntaxError(f"End of arguments reached. Missing a value for argument '{name}' at position {cursor_1}"))
        return False, cursor, ''

In [None]:
get_next_argument(['a', 'b', 'c'], 'b', 1)

(True, 2, 'c')

In [None]:
get_next_argument(['a', 'b', 'c'], 'c', 2)

[SyntaxError]: End of arguments reached. Missing a value for argument 'c' at position 3


(False, 2, '')

In [None]:
get_next_argument(['a', 'b', 'c'], 'c', 2, suppress_error=True)

(False, 2, '')

### Type conversion

In [None]:
def to_integer(value:str) -> (bool, int, float):
    "Try converting a str to int.\nReturn success, the value, and possibly a float remainder."
    try:
        f_value = float(value)
        int_value = int(f_value)
        remainder = f_value - int_value
    except: return False, value, None
    return True, int_value, remainder

In [None]:
to_integer('-2.1'), to_integer('nice')

((True, -2, -0.10000000000000009), (False, 'nice', None))

In [None]:
def to_float(value:str) -> (bool, float):
    "Try converting a str to float.\nReturn success, and the value."
    # TODO: check if 'inf', 'nan', ...?
    try   : return True , float(value)
    except: return False, value

In [None]:
to_float('-1e-3'), to_float('nan'), to_float('nice')

((True, -0.001), (True, nan), (False, 'nice'))

In [None]:
def to_bool(value:str) -> (bool, bool):
    """Try converting a str to bool.
    'True' and 'False' are recognized, otherwise the value is cast to float, and then to bool.
    Return success, and the value."""
    if value == 'True' : return True, True
    if value == 'False': return True, False
    try   : return True , bool(float(value))
    except: return False, value

In [None]:
to_bool('1'), to_bool('0'), to_bool('True'), to_bool('False'), to_bool('abc')

((True, True), (True, False), (True, True), (True, False), (False, 'abc'))

In [None]:
def to_unbounded_array(args:list, cursor:int) -> (bool, int, list):
    """Consume any number of values until either reaching the end of args,
    or until finding a value starting with '-', denoting the beginning of a new argument.
    Return success, the cursor, and the list of values.
    Currently this can't actually fail... don't use unbounded lists kids."""
    values = []
    while True:
        string_success, cursor, value = get_next_argument(args, None, cursor, suppress_error=True)
        if string_success:
            if value[0] != '-': values.append(value)
            else: # value starting with '-' means it's the next command
                cursor -= 1
                break
        else: break
    return True, cursor, values

In [None]:
to_unbounded_array(['-list', '1', '2', '-3'], 0)

(True, 2, ['1', '2'])

In [None]:
def typify(type_or_value:object) -> (type, object):
    """Takes a type or a value.
    Returns a tuple of the type (or type of the value) and value (or None)"""
    return (type_or_value, None) if isinstance(type_or_value, type) else (type(type_or_value), type_or_value)

In [None]:
typify((int, int)*2)

(tuple, (int, int, int, int))

### Parsing

In [None]:
def parse_arguments(command:dict, comment:str) -> (bool, dict, dict):
    "Finds, casts, and returns values from command, in the given comment."    
    members = command.keys()
    result  = command.copy() # copy needed?
    args    = comment.split()
    # TODO: check that the type of all commands is supported ahead of time?
    # TODO: handle quoted arguments?
    
    is_set = {member : False for member in members}
    
    state = {'args': args, 'name': '', 'cursor': 0,
             'inside_array': False,}
    
    success = True
    while state['cursor'] < len(args): # for arg in args:
        arg = args[state['cursor']]
        if arg[0] != '-':
            report_error(SyntaxError(f"Argument {state['cursor']} does not start with a '-'."))
            return False, result, is_set
        arg = arg[1:] # remove '-'
        state['name'] = arg # TODO: check that len(arg) > 0?
        
        for key in members: # loop over keys of command (the things we're supposed to find)
            if key != arg: continue    
            if is_set[key]: # TODO: improve error msg. maybe: "this is the second time this argument was given"?
                report_error(SyntaxError(f"Argument {state['cursor']} ('{arg}') was given multiple times."))
                success = False
            else:
                arg_type, arg_default = typify(command[key])
                member_success = handle_one_argument(result, state, arg_type, arg_default)
                if member_success: is_set[key] = True
                else: success = False
            break # once we have found the correct struct member, stop!
        else: # TODO: improve this msg. maybe: "is not part of the command"?
            report_error(SyntaxError(f"Argument {state['cursor']} ('{arg}') is not valid."))
            success = False
        if not success: break # stop at first error
        state['cursor'] += 1
        
    if success: success = check_is_set(result, is_set)
    return success, result, is_set

In [None]:
def handle_one_argument(result:dict, state:dict, arg_type:type, arg_default:object) -> bool:
    "Parse the input args based on arg_type, and set arg_name in result to that value."
    # NOTE: state and result are modified from here and essentially treated as pointers
    args     = state['args']
    arg_name = state['name']
    success  = True
    if arg_type == str:
        # get the next argument, advance cursor, set success
        string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
        # TODO: how to handle strings that start with a '-'
        if string_success: result[arg_name] = value
        else: success = False

    elif arg_type == bool:
        if state['inside_array']:
            string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
            if string_success:
                bool_success, value = to_bool(value)
                if bool_success: result[arg_name] = value
                else:
                    report_error(ValueError(f"Value of argument {state['cursor']-1} ('{arg_name}') \
                    was not convertable to bool. Please use 'True', 'False', '0', or '1'. (It was '{value}')"))
                    success = False
            else: success = False
        # special case where supplying the argument means True and not supplying it means use the default (False)
        else: result[arg_name] = True

    elif arg_type == int:
        # get the next argument, cast to int, check for remainder, advance cursor, set success
        string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
        if not string_success: return False
        int_success, value, remainder = to_integer(value)
        if int_success:
            result[arg_name] = value
            if remainder:
                report_warning(f"Junk on the end of the value for int argument \
                               {state['cursor']-1} ('{arg_name}'): {remainder}")
        else:
            report_error(ValueError(f"Value of argument {state['cursor']-1} ('{arg_name}') \
                                    was not an int. (It was '{value}')"))
            success = False

    elif arg_type == float:
        # get the next argument, cast to float, advance cursor, set success
        string_success, state['cursor'], value = get_next_argument(args, arg_name, state['cursor'])
        if not string_success: return False
        float_success, value = to_float(value)
        if float_success: result[arg_name] = value
        else:
            report_error(ValueError(f"Value of argument {state['cursor']-1} ('{arg_name}') \
                                    was not a float. (It was '{value}')"))
            success = False

    elif arg_type == list or arg_type == tuple:
        if arg_default is None: # unbounded list / tuple
            if state['inside_array']:
                report_error(SyntaxError(f"Using an unbounded list or tuple inside an array is not supported."))
                return False
            
            array_success, state['cursor'], value = to_unbounded_array(args, state['cursor'])
            if array_success: # NOTE: currently this can't actually fail... don't use unbounded lists kids.
                result[arg_name] = arg_type(value)
            else: success = False
            
        else: # predefined list
            s = {'args': args, 'name': 'v', 'cursor': state['cursor'],
                 'inside_array': True}
            value = []
            for i, x in enumerate(arg_default):
                t, d = typify(x)
                n = f'{arg_name}[{i}]'
                s['name'] = n
                r = {n:d}
                member_success = handle_one_argument(r, s, t, d)
                if member_success: value.append(r[n])
                else: # TODO: Improve error message
                    # report_error(SyntaxError(f"Array argument {state['cursor']} ('{arg_name}') was not passed correctly."))
                    return False
            state['cursor'] = s['cursor']
            result[arg_name] = arg_type(value)

    else:
        report_error(TypeError(f"Argument {state['cursor']} ('{arg_name}') is of unsupported type {arg_type}."))
        success = False
        
    return success

In [None]:
def check_is_set(result:dict, is_set:dict) -> bool:
    "Check if any required (no default) values, haven't been set yet"
    success = True
    for member, v_is_set in is_set.items():
        if v_is_set: continue
        arg_type, arg_default = typify(result[member])
        if arg_default is None: 
            if arg_type == bool: # NOTE: Special case, not setting a boolean means it's False.
                result[member] = False # TODO: set is_set as well? what's the use-case here?
                continue
            report_error(ValueError(f"Argument '{member}' has not been set, and no default value was given."))
            success = False
        elif (arg_type == list) or (arg_type == tuple): # this is a bounded list
            name = [f'{member}[{i}]' for i in range(len(arg_default))]
            r = {n:x for n, x in zip(name, arg_default)}
            s = {n:False for n in r}
            is_set_success = check_is_set(r, s)
            if is_set_success: # re-set result
                result[member] = arg_type([r[n] for n in name])
                continue
            else: success = False
    return success

### Documentation

This argument parser is largely inspired by these two videos by Jonathan Blow.
>[Part 1](https://youtu.be/TwqXTf7VfZk)  
>[Part 2](https://youtu.be/pgiVrhsGkKY)

pt.1 @ 3:12:50 complete code sweep

This module besically provides only one function:  
```python
def parse_arguments(command:dict, comment:str) -> (bool, dict, dict)
```  

It takes one __"command" dictionary__, and a __"comment" string__.  

#### __The command__

is a simple key-value collection of expected flags, where a attribute name maps to either a type, or a default value, from which the type is infered.  
```python
command = {
    'arg1':bool,
    'arg2':str,
    'arg3':32,
    'arg4':3.14,
}
```

#### __The comment__
is just a list of space-separated arguments, with words starting with a minus (`'-'`) denoting a keyword, and anything without a minus as the first character being a value to the previous keyword.  
```python
'-name bob -age 99 -celsius 30.5 -thirsty'
```  
is a valid string for the command  
```python
{
    'name'   : str,
    'weather': 'sunny',
    'celsius': float,
    'age'    : int,
    'thirsty': bool,
    'tired'  : bool
}
```

#### __The primitive types:__
Currently the following primitive types are supported:  
- `str`
    - a `str` argument requires one value.
    - e.g.: `-weather sunny`
- `bool`
    - a `bool` argument requires no values. setting the flag automatically sets the value to `True`.
    - writing `bool` is the same as using the default value `False`.
    - e.g.: `-is_wet`
- `int`
    - a `int` argument requires one value.
    - the value will first be cast to `float`, and then to `int`, partly due to how python works, and also to check for a remainder in case the provided value was actually in a float format.
    - e.g.: `-age 99`, `-negative -1`
- `float`
    - a `float` argument requires one value.
    - the value has to be castable to `float`. what is and what isn't a float can be suprising, so you should check the [casting rules](https://stackoverflow.com/a/20929983/) beforehand.
    - e.g.: `-pi 3.14`, `-negative -1.0`, `-weird nan`, `-large inf`, `-small -inf`
  
Any of these types can be declared either by just using the `type` directly, or by giving a default value of the specific `type`. All arguments that use the `type` directly have to be passed in the comment. If a default value is specified, or if the `type` is `bool`, the argument does not have to be passed in the comment, and instead the `result` will simply contain the default value. This changes with composite types (see below). If an argument was passed in the comment or not, can be seen by looking at the `is_set` return value (see below).

  
##### __The composite types__
`list` and `tuple` (referred to as 'array' when it can be either one of them) are also supported, however due to pythons lack of strong typing, they have slightly different semantics.  

Specifying only the type `list` or `tuple`, will result in an 'unbounded array' of that type, meaning that all values following the keyword will be added to the array, until either the end of arguments is reached, or a value starts with a minus (`'-'`), which denotes the start of the next argument. All values or the array will be of type `str`. This kind of argument should be used with caution (e.g. negative values).  
```python
{
    'unbounded_list' : list,
    'unbounded_tuple': tuple,
}
```  

The other, better way to use arrays is to actually create an array containing the types, default values, and ordering you want the values to have. This can get arbitrarily complex, mixing and matching any supported primitive type you want. The only thing not allowed, is using an unbounded array (see above).  
All values will be cast to the corresponding type using all the same semantics as of they were single values (see above). The only exception to that is the `bool` type, where the value has to be either `'True'`, `'False'`, or interpretable as a `float`, which will then be cast to a `bool`. This means that e.g. `'0.0'` will result in `False`, and `'123'` will result in `'True'` (careful, check the [casting rules](https://docs.python.org/3.3/library/stdtypes.html?highlight=frozenset#truth-value-testing) first).
```python
{
    'arg1': [int]*5,
    'arg2': (3.14, 'pi', bool),
    'arg3': (bool, str, 123)*2,
    'arg4': [[0]*3, [1]*3, [str]*3],
    'arg5': [str, int, bool, True, [1, '2', 3, bool], (2.1, float)]
}
```

#### __The return value__
is a three-tuple of `(success, result, is_set)`.  
- `success` is a `bool`, saying whether or not parsing was successful. If it is `False`, the other two arguments are not guarenteed to be valid. There will be an error message with details on what happened to help debugging.  
- `result` is a `dict` with exactly the same keys as the input `command`, with the corresponding values set to whatever was extracted from the comment. In cases where `success` if `False`, this might only be partially filled out, so `success` should always be checked.
- `is_set` is a `dict`, which also contains exactly the same keys as the input `command`, this time mapping to a `bool`, which is `True` if `comment` contains a value for the particular argument, and `False` otherwise. In cases where a default value is given in `command`, the same rule applies. Meaning that only if the default was overwritten by an argument in `comment` will the `is_set` value be `True`. This holds even for `bool`s, which default to `False` if no default was given.

### use-case

### Examples

In [None]:
command = {
    'test'  : bool,
    'sunny' : False,
    'toast' : str,
    'shots' : int,
    'scale' : float,
    'scoops': [str, int, bool, [1, 2, 3, bool], (float, float)],
    # 'valid' : (bool, bool),
    'valid' : (1, 1.23, bool, 'hi', [1, 2]),
    'nah'   : 'boi',
    'sweet' : bool,
    'nr'    : int,
    'list'  : list
}

comment = '-sunny -toast jelly -shots 25 -scale 69105.1234 -test -list 2 -scoops a 1 0 5 6 7 False 3.0 2.1 -nr 21'
# comment = '-sunny -toast jelly -shots 25 -scale 69105.1234 -test -nr 1'
parse_arguments(command, comment)

(True,
 {'test': True,
  'sunny': True,
  'toast': 'jelly',
  'shots': 25,
  'scale': 69105.1234,
  'scoops': ['a', 1, False, [5, 6, 7, False], (3.0, 2.1)],
  'valid': (1, 1.23, False, 'hi', [1, 2]),
  'nah': 'boi',
  'sweet': False,
  'nr': 21,
  'list': ['2']},
 {'test': True,
  'sunny': True,
  'toast': True,
  'shots': True,
  'scale': True,
  'scoops': True,
  'valid': False,
  'nah': False,
  'sweet': False,
  'nr': True,
  'list': True})

In [None]:
%timeit parse_arguments(command, comment)

46.8 µs ± 59.2 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## Imports

In [None]:
# export
from collections import namedtuple, defaultdict
import os
import re
from nbdev_rewrite.imports import *

from inspect import signature

import ast
from ast import iter_fields, AST
import _ast

## The normal stuff

In [None]:
def init_config(lib_name='nbdev_rewrite', user='flpeters', nbs_path='.'):
    "create a config file, if it doesn't already exist"
    if not Config().config_file.exists(): create_config(lib_name, user, nbs_path=nbs_path)
init_config()

In [None]:
def init_lib():
    "initialize the module folder, if it's not initialized already"
    C = Config()
    if (not C.lib_path.exists()) or (not (C.lib_path/'__init__.py').exists()):
        C.lib_path.mkdir(parents=True, exist_ok=True)
        with (C.lib_path/'__init__.py').open('w') as f:
            f.write(f'__version__ = "{C.version}"\n')
    else: pass # module *should* already exists
init_lib()

In [None]:
_reserved_dirs = (Config().lib_path, Config().nbs_path, Config().doc_path)
def crawl_nbs(path:Path=None, recurse:bool=True) -> list:
    "finds a list of ipynb files to convert"
    if path is None: path = Config().nbs_path
    if isinstance(path, (list, tuple)):
        for p in path: yield from crawl_nbs(p, recurse)
    elif path.is_file(): yield path
    else:
        for p in path.iterdir():
            f = p.name
            if f.startswith('.') or f.startswith('_'): continue
            if p.is_file():
                if f.endswith('.ipynb'): yield p
                else: continue
            elif p.is_dir() and recurse:
                if p in _reserved_dirs: continue
                else: yield from crawl_nbs(p, recurse)
            else: continue
list(crawl_nbs())

[WindowsPath('D:/Projects/GitHub/nbdev_rewrite/00_export.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/01_helpers.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/02_export_v2.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/03_export_v3.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/99_index.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/sub/lalalala.ipynb')]

In [None]:
def read_nb(fname:Path) -> nbformat.notebooknode.NotebookNode:
    "Read the notebook in `fname`."
    with open(Path(fname),'r', encoding='utf8') as f: return nbformat.reads(f.read(), as_version=4)
len(read_nb('03_export_v3.ipynb')['cells'])

98

In [None]:
def module2path(module:str) -> str:
    "replaces the python module '.' seperator with os specific path seperator"
    return os.path.sep.join(module.split('.'))
module2path('test.abc')

'test\\abc'

In [None]:
# export
def _notebook2script(fname, silent=False):
    fname = Path(fname)
    print(fname.name)
    nb = read_nb(fname)
    cells = nb['cells']
    C = Config()
    sep = '\n' * (max(int(C.get('cell_spacing', 1)), 0) + 1)
    for cell in cells:
        # scan for
        # default_exp
        # export
        # hide
        # put all the stuff in datastructure, which should be thread safe
        pass

In [None]:
def notebook2script(fname=None, silent=False, to_dict=False):
    # init target module directory
    init_lib()
    files = crawl_nbs(fname)
    exports = []
    for file in files:
        exports.append(_notebook2script(file, silent))
    # merge_exports(exports)
    # if fname is a file, convert only that file, if possible.
    # if its None, use Config() directory
    # if its a directory,
        # crawl source directory recursively to find all files that should be converted
    # create thread/process pool for processing all files in parallel
    # execute compilation on each of the files, resulting in a dataformat representing the converted file.
    # merge all files, based on cross-exporting stored in dataformat
    # TODO: handle cross-exporting if the targeted file already exists, but wasn't compiled from scratch
    # probably should force a recompile of that file as well
    # write files to disk
    return exports
notebook2script()

00_export.ipynb
01_helpers.ipynb
02_export_v2.ipynb
03_export_v3.ipynb
99_index.ipynb
lalalala.ipynb


[None, None, None, None, None, None]

## Develop Parallelism

### Prefetcher [working]

In [None]:
from parallel import BackgroundGenerator, prefetch

In [None]:
@prefetch(max_prefetch=4)
def file_generator():
    for f in crawl_nbs(): yield read_nb(f)

In [None]:
import time

In [None]:
tt = 0
t0 = time.time()
for x in file_generator():
    t1 = time.time()
    tt += t1 - t0
    time.sleep(.5) # work
    print(len(x['cells']))
    t0 = time.time()
print(round(tt, 6))

90
2
100
98
31
2
0.025006


### Basic Threading

In [None]:
import threading 
import time
  
def print_hello():
    for i in range(4):
        time.sleep(0.5)
        print("Hello")
        
def print_hi(): 
    for i in range(4): 
        time.sleep(0.7)
        print("Hi") 

t1 = threading.Thread(target=print_hello)  
t2 = threading.Thread(target=print_hi)  
t1.start()
t2.start()

Hello
Hi
Hello
Hi
Hello
Hello
Hi
Hi


### multiprocessing

for multiprocessing to work, the function thats supposed to be executed, has to be importable aka in a .py file.

In [None]:
# scan for comments in all cells
# check for `export`, `hide`, and `meta` comments
# if any `meta`:
#     execute `meta`
#     if control inversion:
#         pass for now
#     check for potential new comments due to `meta` execution
# parse or discard all remaining comments
# execute commands
# aggregate results back in main process
# return

In [None]:
from parallel import f

In [None]:
[f(x) for x in range(10)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [None]:
from multiprocessing import Pool
from time import sleep

In [None]:
def a(x): return x**3

In [None]:
pool = ProcessPoolExecutor(max_workers=4)

In [None]:
concurrent.futures.ProcessPoolExecutor??

In [None]:
for file in file_generator():
    print(len(file['cells']))

90
2
100
28
31
2


In [None]:
with ProcessPoolExecutor(max_workers=4) as pool:
    print(list(pool.map(f, range(10))))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [None]:
if __name__ == '__main__':
    # start 4 worker processes
    with Pool(processes=4) as pool:

        # print "[0, 1, 4,..., 81]"
        print(pool.map(f, range(10)))

        # print same numbers in arbitrary order
        for i in pool.imap_unordered(f, range(10)):
            print(i)

        # evaluate "f(10)" asynchronously
        res = pool.apply_async(f, [10])
        print(res.get(timeout=1))             # prints "100"

        # make worker sleep for 10 secs
        # res = pool.apply_async(sleep, [10])
        # print(res.get(timeout=1))             # raises multiprocessing.TimeoutError

    # exiting the 'with'-block has stopped the pool

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
0
1
4
9
16
25
36
49
64
81
100


### Coroutines and generators

https://stackoverflow.com/questions/19302530/python-generator-send-function-purpose

In [None]:
def coroutine():
    for i in range(1, 10): print(f'From generator {yield i}')
c = coroutine()
c.send(None)
try:
    while True: print(f'From user {c.send(1)}')
except StopIteration: pass

From generator 1
From user 2
From generator 1
From user 3
From generator 1
From user 4
From generator 1
From user 5
From generator 1
From user 6
From generator 1
From user 7
From generator 1
From user 8
From generator 1
From user 9
From generator 1


In [None]:
# Unaffected by send
def double_number(number):
    while True:
        number *=2 
        yield number

c = double_number(4)
print(c.send(None))
print(next(c))
print(next(c))
print(c.send(8))
print(c.send(8))
print(c.send(8))

8
16
32
64
128
256


In [None]:
# Works with send
def double_number(number):
    while True:
        number *= 2
        number = yield number
        
c = double_number(4)
print(c.send(None))
print(c.send(5)) #10
print(c.send(1500)) #3000
print(c.send(3)) #6

8
10
3000
6


In [None]:
def double_inputs():
    while True:
        x = yield
        yield x * 2

gen = double_inputs()
print(next(gen))       # run up to the first yield
print(gen.send(10))    # goes into 'x' variable

print(next(gen))       # run up to the next yield
print(gen.send(6))     # goes into 'x' again

print(next(gen))       # run up to the next yield
print(gen.send(94.3))  # goes into 'x' again

None
20
None
12
None
188.6


In [None]:
# @defer.inlineCallbacks
# def doStuff():
#     result = yield takesTwoSeconds()
#     nextResult = yield takesTenSeconds(result * 10)
#     defer.returnValue(nextResult / 10)

In [None]:
# def doStuff():
#     returnDeferred = defer.Deferred()
#     def gotNextResult(nextResult):
#         returnDeferred.callback(nextResult / 10)
#     def gotResult(result):
#         takesTenSeconds(result * 10).addCallback(gotNextResult)
#     takesTwoSeconds().addCallback(gotResult)
#     return returnDeferred

### Combining multithreading, multiprocessing and generators

In [None]:
# TODO(florian): Only look for 0 indent comments?
def iter_comments(src:str, cell_nr:int, pure_comments_only:bool=True, line_limit=None):
    "Detect all comments in a piece of code, excluding those that are a part of a string."
    in_lstr = in_sstr = False
    count, quote = 1, ''
    for i, line in enumerate(src.splitlines()[:line_limit]):
        is_pure, escape, prev_c = True, False, '\n'
        for j, c in enumerate(line):
            # we can't break as soon as not is_pure, because we have to detect if a multiline string beginns
            if is_pure and (not (c.isspace() or c == '#')): is_pure = False
            if (in_sstr or in_lstr):
                # assert (in_sstr and not in_lstr) or (in_lstr and not in_sstr)
                if escape: count = 0
                else:
                    if (c == quote):
                        count = ((count + 1) if (c == prev_c) else 1)
                        if in_sstr: in_sstr = False
                        elif (in_lstr and (count == 3)): count, in_lstr = 0, False
                escape = False if escape else (c == '\\')
            else:                    
                if (c == '#'):
                    if (pure_comments_only and is_pure): yield (line, (i, j))
                    elif (not pure_comments_only):       yield (line[j:], (i, j))
                    break
                elif c == "'" or c == '"':
                    count = ((count + 1) if (c == prev_c) else 1)
                    if count == 1: in_sstr = True
                    elif count == 3: count, in_lstr = 0, True
                    else: raise SyntaxError(f'Unexpected quote repetition count: {count} Should be either 1 or 3. Cell_nr: {cell_nr} Line:{i}/{j}')
                    quote = c
            prev_c = c

In [None]:
class Cell:
    def __init__(self, cell:dict, cell_nr:int):
        # cell data
        self.cell_nr     = cell_nr
        self.source_code = cell['source']
        self.cell_type   = cell['cell_type']
        # file state
        self.default_export = None
        # cell state
        self.names , self._comments = set(), None
        self.export, self.internal  = False, False
        
    def iter_ruptor(self, gen):
        agg = []
        for x in gen:
            agg.append(x)
            yield x
        self._comments = agg
        
    def __iter__(self):
        if self._comments is None:
            return self.iter_ruptor(iter_comments(self.source_code, self.cell_nr))
        else: return iter(self._comments)
        
    def stage_one(self):
        for comment in self:
            cmd = self.decode_comment(comment)
            if cmd is None:
                pass
            
    def __repr__(self):
        return f'{self.cell_type}, {self.export}, {self.internal}, {self.default_export},\n{self.source_code}'

In [None]:
cell = Cell(read_nb('03_export_v3.ipynb')['cells'][0], 1)

In [None]:
iter(cell)

<list_iterator at 0x1bf7ff2b688>

In [None]:
for c in cell:
    print(c)

('# export', (0, 0))


In [None]:
list(iter(cell))

[]

In [None]:
class E:
    def __init__(self, file_path, cells):
        self.default_export = None
        self.meta_cells = {}
        self.export_cells = {}
        self.file_path = file_path
        self.cells = cells
        
    def stage_one(self):
        for i, cell in enumerate(self.cells):
            is_meta, is_export, is_internal, comments = find_builtins(cell)
            if is_meta: self.meta_cells[i] = cell
            if is_export: self.export_cells[i] = (cell, is_internal)
            
    def stage_two(self, metas, default_exports):
        self.meta_cells     = self.merge_metas          (self.meta_cells    , metas)
        self.default_export = self.merge_default_exports(self.default_export, default_exports)
        
        if self.meta_cells: self.run_meta()
        
            
    def run(self):
        self.stage_one()
        metas, default_exports = yield self.meta_cells, self.default_export
        self.stage_two(metas, default_exports)
        
        
    def main(self):
        # communicate with main process
        pass

In [None]:
def partial_cells(fname, cells):
    e = E(fname, cells)
    metas, default_exports = next(e)
    # communicate with main process
    return e.send(metas, default_exports)
    

#### per file

In [None]:
def export_file(file):
    processes = []
    for chunk in chunkify(file['cells']):
        processes.append(partial_cells(chunk))
    dist_data = {}
    for p in processes:
        dist_data.set_data(p.get_dist())
    for p in processes: p.push_dist(dist_data)
    return [p.complete() for p in processes]

In [None]:
for cell in cells:
    cell = Cell(cell)
    

#### per run

In [None]:
def do_file(file): print(len(file['cells']))

In [None]:
for file in file_generator(): do_file(file)

90
2
100
48
31
2
