## Imports

In [None]:
# export
from collections import namedtuple, defaultdict
import os
import re
from nbdev_rewrite.imports import *

from inspect import signature

import ast
from ast import iter_fields, AST
import _ast

## The normal stuff

In [None]:
def init_config(lib_name='nbdev_rewrite', user='flpeters', nbs_path='.'):
    "create a config file, if it doesn't already exist"
    if not Config().config_file.exists(): create_config(lib_name, user, nbs_path=nbs_path)
init_config()

In [None]:
def init_lib():
    "initialize the module folder, if it's not initialized already"
    C = Config()
    if (not C.lib_path.exists()) or (not (C.lib_path/'__init__.py').exists()):
        C.lib_path.mkdir(parents=True, exist_ok=True)
        with (C.lib_path/'__init__.py').open('w') as f:
            f.write(f'__version__ = "{C.version}"\n')
    else: pass # module *should* already exists
init_lib()

In [None]:
_reserved_dirs = (Config().lib_path, Config().nbs_path, Config().doc_path)
def crawl_nbs(path:Path=None, recurse:bool=True) -> list:
    "finds a list of ipynb files to convert"
    if path is None: path = Config().nbs_path
    if isinstance(path, (list, tuple)):
        for p in path: yield from crawl_nbs(p, recurse)
    elif path.is_file(): yield path
    else:
        for p in path.iterdir():
            f = p.name
            if f.startswith('.') or f.startswith('_'): continue
            if p.is_file():
                if f.endswith('.ipynb'): yield p
                else: continue
            elif p.is_dir() and recurse:
                if p in _reserved_dirs: continue
                else: yield from crawl_nbs(p, recurse)
            else: continue
list(crawl_nbs())

[WindowsPath('D:/Projects/GitHub/nbdev_rewrite/00_export.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/01_helpers.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/02_export_v2.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/03_export_v3.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/99_index.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/sub/lalalala.ipynb')]

In [None]:
def read_nb(fname:Path) -> nbformat.notebooknode.NotebookNode:
    "Read the notebook in `fname`."
    with open(Path(fname),'r', encoding='utf8') as f: return nbformat.reads(f.read(), as_version=4)
len(read_nb('03_export_v3.ipynb')['cells'])

98

In [None]:
def module2path(module:str) -> str:
    "replaces the python module '.' seperator with os specific path seperator"
    return os.path.sep.join(module.split('.'))
module2path('test.abc')

'test\\abc'

In [None]:
# export
def _notebook2script(fname, silent=False):
    fname = Path(fname)
    print(fname.name)
    nb = read_nb(fname)
    cells = nb['cells']
    C = Config()
    sep = '\n' * (max(int(C.get('cell_spacing', 1)), 0) + 1)
    for cell in cells:
        # scan for
        # default_exp
        # export
        # hide
        # put all the stuff in datastructure, which should be thread safe
        pass

In [None]:
def notebook2script(fname=None, silent=False, to_dict=False):
    # init target module directory
    init_lib()
    files = crawl_nbs(fname)
    exports = []
    for file in files:
        exports.append(_notebook2script(file, silent))
    # merge_exports(exports)
    # if fname is a file, convert only that file, if possible.
    # if its None, use Config() directory
    # if its a directory,
        # crawl source directory recursively to find all files that should be converted
    # create thread/process pool for processing all files in parallel
    # execute compilation on each of the files, resulting in a dataformat representing the converted file.
    # merge all files, based on cross-exporting stored in dataformat
    # TODO: handle cross-exporting if the targeted file already exists, but wasn't compiled from scratch
    # probably should force a recompile of that file as well
    # write files to disk
    return exports
notebook2script()

00_export.ipynb
01_helpers.ipynb
02_export_v2.ipynb
03_export_v3.ipynb
99_index.ipynb
lalalala.ipynb


[None, None, None, None, None, None]

## Develop Parallelism

### Prefetcher [working]

In [None]:
from parallel import BackgroundGenerator, prefetch

In [None]:
@prefetch(max_prefetch=4)
def file_generator():
    for f in crawl_nbs(): yield read_nb(f)

In [None]:
import time

In [None]:
tt = 0
t0 = time.time()
for x in file_generator():
    t1 = time.time()
    tt += t1 - t0
    time.sleep(.5) # work
    print(len(x['cells']))
    t0 = time.time()
print(round(tt, 6))

90
2
100
98
31
2
0.025006


### Basic Threading

In [None]:
import threading 
import time
  
def print_hello():
    for i in range(4):
        time.sleep(0.5)
        print("Hello")
        
def print_hi(): 
    for i in range(4): 
        time.sleep(0.7)
        print("Hi") 

t1 = threading.Thread(target=print_hello)  
t2 = threading.Thread(target=print_hi)  
t1.start()
t2.start()

Hello
Hi
Hello
Hi
Hello
Hello
Hi
Hi


### multiprocessing

for multiprocessing to work, the function thats supposed to be executed, has to be importable aka in a .py file.

In [None]:
# scan for comments in all cells
# check for `export`, `hide`, and `meta` comments
# if any `meta`:
#     execute `meta`
#     if control inversion:
#         pass for now
#     check for potential new comments due to `meta` execution
# parse or discard all remaining comments
# execute commands
# aggregate results back in main process
# return

In [None]:
from parallel import f

In [None]:
[f(x) for x in range(10)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [None]:
from multiprocessing import Pool
from time import sleep

In [None]:
def a(x): return x**3

In [None]:
pool = ProcessPoolExecutor(max_workers=4)

In [None]:
concurrent.futures.ProcessPoolExecutor??

In [None]:
for file in file_generator():
    print(len(file['cells']))

90
2
100
28
31
2


In [None]:
with ProcessPoolExecutor(max_workers=4) as pool:
    print(list(pool.map(f, range(10))))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [None]:
if __name__ == '__main__':
    # start 4 worker processes
    with Pool(processes=4) as pool:

        # print "[0, 1, 4,..., 81]"
        print(pool.map(f, range(10)))

        # print same numbers in arbitrary order
        for i in pool.imap_unordered(f, range(10)):
            print(i)

        # evaluate "f(10)" asynchronously
        res = pool.apply_async(f, [10])
        print(res.get(timeout=1))             # prints "100"

        # make worker sleep for 10 secs
        # res = pool.apply_async(sleep, [10])
        # print(res.get(timeout=1))             # raises multiprocessing.TimeoutError

    # exiting the 'with'-block has stopped the pool

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
0
1
4
9
16
25
36
49
64
81
100


### Coroutines and generators

https://stackoverflow.com/questions/19302530/python-generator-send-function-purpose

In [None]:
def coroutine():
    for i in range(1, 10): print(f'From generator {yield i}')
c = coroutine()
c.send(None)
try:
    while True: print(f'From user {c.send(1)}')
except StopIteration: pass

From generator 1
From user 2
From generator 1
From user 3
From generator 1
From user 4
From generator 1
From user 5
From generator 1
From user 6
From generator 1
From user 7
From generator 1
From user 8
From generator 1
From user 9
From generator 1


In [None]:
# Unaffected by send
def double_number(number):
    while True:
        number *=2 
        yield number

c = double_number(4)
print(c.send(None))
print(next(c))
print(next(c))
print(c.send(8))
print(c.send(8))
print(c.send(8))

8
16
32
64
128
256


In [None]:
# Works with send
def double_number(number):
    while True:
        number *= 2
        number = yield number
        
c = double_number(4)
print(c.send(None))
print(c.send(5)) #10
print(c.send(1500)) #3000
print(c.send(3)) #6

8
10
3000
6


In [None]:
def double_inputs():
    while True:
        x = yield
        yield x * 2

gen = double_inputs()
print(next(gen))       # run up to the first yield
print(gen.send(10))    # goes into 'x' variable

print(next(gen))       # run up to the next yield
print(gen.send(6))     # goes into 'x' again

print(next(gen))       # run up to the next yield
print(gen.send(94.3))  # goes into 'x' again

None
20
None
12
None
188.6


In [None]:
# @defer.inlineCallbacks
# def doStuff():
#     result = yield takesTwoSeconds()
#     nextResult = yield takesTenSeconds(result * 10)
#     defer.returnValue(nextResult / 10)

In [None]:
# def doStuff():
#     returnDeferred = defer.Deferred()
#     def gotNextResult(nextResult):
#         returnDeferred.callback(nextResult / 10)
#     def gotResult(result):
#         takesTenSeconds(result * 10).addCallback(gotNextResult)
#     takesTwoSeconds().addCallback(gotResult)
#     return returnDeferred

### Combining multithreading, multiprocessing and generators

In [None]:
# TODO(florian): Only look for 0 indent comments?
def iter_comments(src:str, cell_nr:int, pure_comments_only:bool=True, line_limit=None):
    "Detect all comments in a piece of code, excluding those that are a part of a string."
    in_lstr = in_sstr = False
    count, quote = 1, ''
    for i, line in enumerate(src.splitlines()[:line_limit]):
        is_pure, escape, prev_c = True, False, '\n'
        for j, c in enumerate(line):
            # we can't break as soon as not is_pure, because we have to detect if a multiline string beginns
            if is_pure and (not (c.isspace() or c == '#')): is_pure = False
            if (in_sstr or in_lstr):
                # assert (in_sstr and not in_lstr) or (in_lstr and not in_sstr)
                if escape: count = 0
                else:
                    if (c == quote):
                        count = ((count + 1) if (c == prev_c) else 1)
                        if in_sstr: in_sstr = False
                        elif (in_lstr and (count == 3)): count, in_lstr = 0, False
                escape = False if escape else (c == '\\')
            else:                    
                if (c == '#'):
                    if (pure_comments_only and is_pure): yield (line, (i, j))
                    elif (not pure_comments_only):       yield (line[j:], (i, j))
                    break
                elif c == "'" or c == '"':
                    count = ((count + 1) if (c == prev_c) else 1)
                    if count == 1: in_sstr = True
                    elif count == 3: count, in_lstr = 0, True
                    else: raise SyntaxError(f'Unexpected quote repetition count: {count} Should be either 1 or 3. Cell_nr: {cell_nr} Line:{i}/{j}')
                    quote = c
            prev_c = c

In [None]:
class Cell:
    def __init__(self, cell:dict, cell_nr:int):
        # cell data
        self.cell_nr     = cell_nr
        self.source_code = cell['source']
        self.cell_type   = cell['cell_type']
        # file state
        self.default_export = None
        # cell state
        self.names , self._comments = set(), None
        self.export, self.internal  = False, False
        
    def iter_ruptor(self, gen):
        agg = []
        for x in gen:
            agg.append(x)
            yield x
        self._comments = agg
        
    def __iter__(self):
        if self._comments is None:
            return self.iter_ruptor(iter_comments(self.source_code, self.cell_nr))
        else: return iter(self._comments)
        
    def stage_one(self):
        for comment in self:
            cmd = self.decode_comment(comment)
            if cmd is None:
                pass
            
    def __repr__(self):
        return f'{self.cell_type}, {self.export}, {self.internal}, {self.default_export},\n{self.source_code}'

In [None]:
cell = Cell(read_nb('03_export_v3.ipynb')['cells'][0], 1)

In [None]:
iter(cell)

<list_iterator at 0x1bf7ff2b688>

In [None]:
for c in cell:
    print(c)

('# export', (0, 0))


In [None]:
list(iter(cell))

[]

In [None]:
class E:
    def __init__(self, file_path, cells):
        self.default_export = None
        self.meta_cells = {}
        self.export_cells = {}
        self.file_path = file_path
        self.cells = cells
        
    def stage_one(self):
        for i, cell in enumerate(self.cells):
            is_meta, is_export, is_internal, comments = find_builtins(cell)
            if is_meta: self.meta_cells[i] = cell
            if is_export: self.export_cells[i] = (cell, is_internal)
            
    def stage_two(self, metas, default_exports):
        self.meta_cells     = self.merge_metas          (self.meta_cells    , metas)
        self.default_export = self.merge_default_exports(self.default_export, default_exports)
        
        if self.meta_cells: self.run_meta()
        
            
    def run(self):
        self.stage_one()
        metas, default_exports = yield self.meta_cells, self.default_export
        self.stage_two(metas, default_exports)
        
        
    def main(self):
        # communicate with main process
        pass

In [None]:
def partial_cells(fname, cells):
    e = E(fname, cells)
    metas, default_exports = next(e)
    # communicate with main process
    return e.send(metas, default_exports)
    

#### per file

In [None]:
def export_file(file):
    processes = []
    for chunk in chunkify(file['cells']):
        processes.append(partial_cells(chunk))
    dist_data = {}
    for p in processes:
        dist_data.set_data(p.get_dist())
    for p in processes: p.push_dist(dist_data)
    return [p.complete() for p in processes]

In [None]:
for cell in cells:
    cell = Cell(cell)
    

#### per run

In [None]:
def do_file(file): print(len(file['cells']))

In [None]:
for file in file_generator(): do_file(file)

90
2
100
48
31
2
