In [None]:
# export
from collections import namedtuple, defaultdict
import os
import re
from nbdev_rewrite.imports import *

from inspect import signature

import ast
from ast import iter_fields, AST
import _ast

## The normal stuff

In [None]:
def init_config(lib_name='nbdev_rewrite', user='flpeters', nbs_path='.'):
    "create a config file, if it doesn't already exist"
    if not Config().config_file.exists(): create_config(lib_name, user, nbs_path=nbs_path)
init_config()

In [None]:
def init_lib():
    "initialize the module folder, if it's not initialized already"
    C = Config()
    if (not C.lib_path.exists()) or (not (C.lib_path/'__init__.py').exists()):
        C.lib_path.mkdir(parents=True, exist_ok=True)
        with (C.lib_path/'__init__.py').open('w') as f:
            f.write(f'__version__ = "{C.version}"\n')
    else: pass # module *should* already exists
init_lib()

In [None]:
_reserved_dirs = (Config().lib_path, Config().nbs_path, Config().doc_path)
def crawl_nbs(path:Path=None, recurse:bool=True) -> list:
    "finds a list of ipynb files to convert"
    if path is None: path = Config().nbs_path
    files = []
    if isinstance(path, (list, tuple)):
        for p in path: files.extend(crawl_nbs(p))
    elif path.is_file(): return [path]
    else:
        for p in path.iterdir():
            f = p.name
            if f.startswith('.') or f.startswith('_'): continue
            if p.is_file():
                if f.endswith('.ipynb'): files.append(p)
                else: continue
            elif p.is_dir() and recurse:
                if p in _reserved_dirs: continue
                else: files.extend(crawl_nbs(p))
            else: pass
    return files
crawl_nbs()

[WindowsPath('D:/Projects/GitHub/nbdev_rewrite/00_export.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/01_helpers.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/02_export_v2.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/03_export_v3.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/99_index.ipynb'),
 WindowsPath('D:/Projects/GitHub/nbdev_rewrite/sub/lalalala.ipynb')]

In [None]:
def read_nb(fname:Path) -> nbformat.notebooknode.NotebookNode:
    "Read the notebook in `fname`."
    with open(Path(fname),'r', encoding='utf8') as f: return nbformat.reads(f.read(), as_version=4)
len(read_nb('03_export_v3.ipynb')['cells'])

24

In [None]:
def module2path(module:str) -> str:
    "replaces the python module '.' seperator with os specific path seperator"
    return os.path.sep.join(module.split('.'))
module2path('test.abc')

'test\\abc'

In [None]:
# export
def _notebook2script(fname, silent=False):
    fname = Path(fname)
    print(fname.name)
    nb = read_nb(fname)
    cells = nb['cells']
    C = Config()
    sep = '\n' * (max(int(C.get('cell_spacing', 1)), 0) + 1)
    for cell in cells:
        # scan for
        # default_exp
        # export
        # hide
        # put all the stuff in datastructure, which should be thread safe
        pass

In [None]:
def notebook2script(fname=None, silent=False, to_dict=False):
    # init target module directory
    init_lib()
    files = crawl_nbs(fname)
    exports = []
    for file in files:
        exports.append(_notebook2script(file, silent))
    # merge_exports(exports)
    # if fname is a file, convert only that file, if possible.
    # if its None, use Config() directory
    # if its a directory,
        # crawl source directory recursively to find all files that should be converted
    # create thread/process pool for processing all files in parallel
    # execute compilation on each of the files, resulting in a dataformat representing the converted file.
    # merge all files, based on cross-exporting stored in dataformat
    # TODO: handle cross-exporting if the targeted file already exists, but wasn't compiled from scratch
    # probably should force a recompile of that file as well
    # write files to disk
    return exports
notebook2script()

00_export.ipynb
01_helpers.ipynb
02_export_v2.ipynb
03_export_v3.ipynb
99_index.ipynb
lalalala.ipynb


[None, None, None, None, None, None]

## Develop Parallelism

### Prefetcher [working]

In [None]:
from parallel import BackgroundGenerator, prefetch

In [None]:
@prefetch(max_prefetch=4)
def file_generator():
    for f in crawl_nbs(): yield read_nb(f)

In [None]:
import time

In [None]:
tt = 0
t0 = time.time()
for x in file_generator():
    t1 = time.time()
    tt += t1 - t0
    time.sleep(.5) # work
    print(len(x['cells']))
    t0 = time.time()
print(round(tt, 6))

90
2
100
24
31
2
0.022999


### Basic Threading

In [None]:
import threading 
import time
  
def print_hello():
    for i in range(4):
        time.sleep(0.5)
        print("Hello")
        
def print_hi(): 
    for i in range(4): 
        time.sleep(0.7)
        print("Hi") 

t1 = threading.Thread(target=print_hello)  
t2 = threading.Thread(target=print_hi)  
t1.start()
t2.start()

Hello
Hi
Hello
Hi
Hello
Hello
Hi
Hi


### multiprocessing

for multiprocessing to work, the function thats supposed to be executed, has to be importable aka in a .py file.

In [None]:
# scan for comments in all cells
# check for `export`, `hide`, and `meta` comments
# if any `meta`:
#     execute `meta`
#     if control inversion:
#         pass for now
#     check for potential new comments due to `meta` execution
# parse or discard all remaining comments
# execute commands
# aggregate results back in main process
# return

In [None]:
from parallel import f

In [None]:
[f(x) for x in range(10)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [None]:
from multiprocessing import Pool
from time import sleep

In [None]:
def a(x): return x**3

In [None]:
pool = ProcessPoolExecutor(max_workers=4)

In [None]:
pool.submit()

AttributeError: 'ProcessPoolExecutor' object has no attribute 'super'

In [None]:
with ProcessPoolExecutor(max_workers=4) as pool:
    print(list(pool.map(f, range(10))))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [None]:
if __name__ == '__main__':
    # start 4 worker processes
    with Pool(processes=4) as pool:

        # print "[0, 1, 4,..., 81]"
        print(pool.map(f, range(10)))

        # print same numbers in arbitrary order
        for i in pool.imap_unordered(f, range(10)):
            print(i)

        # evaluate "f(10)" asynchronously
        res = pool.apply_async(f, [10])
        print(res.get(timeout=1))             # prints "100"

        # make worker sleep for 10 secs
        # res = pool.apply_async(sleep, [10])
        # print(res.get(timeout=1))             # raises multiprocessing.TimeoutError

    # exiting the 'with'-block has stopped the pool

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
0
1
4
9
16
25
36
49
64
81
100
