# Parsing the notebook

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from IPython.display import display, Markdown, HTML
from pprint import pprint

## Loading a notebook

In [None]:
#|default_exp nbx.nbparser
#|export
from typing import Union, List, Tuple
from cgnai.utils import listmap
import ipynbname
import json
from cgnai.utils import Bunch
from pathlib import Path

In [None]:
#|export
def load_nb(fname):
    """Loads a a ipynotebook-bunch""" 
    try:
        nbdict = json.loads(fname)
    except:
        nbdict = json.load(open(fname,'r',encoding="utf-8"))
        
    nbdict["fname"] = str(fname)
    return Bunch(**nbdict)

In [None]:
%%capture
nb = load_nb("nbx_example/test_notebook.ipynb")
for i,cell in enumerate(nb.cells):
    h = f"|   cell {i}   |"
    n = len(h) - 2    
    display(HTML(f"<h3 style='color: black'>Cell - {i} - {cell['cell_type']}</h3>"))
    s = "<br>".join(cell["source"])
    s = f"<code>{s}</code>"
    display(HTML(s.replace("\n", "<span style='color:red'> &crarr;</span>")))

# Parsing a line

In [None]:
#|export 
def add_fail(p):
    """
    Adds an argument that determines the 
    return value in case of a failed parse
    """
    def q(*args, fail=None, **kwargs):
        r = p(*args, **kwargs, )
        if r is None: return fail
        else: return r
    return q

## Parsing Hashtags (`#nbx`)

In [None]:
#|export
import re
tag_rx = re.compile(r"^\s*#([a-zA-Z_]+)(.*)$")

def parse_any_tag(line):
    """
    Splits off a tag (eg. `#MyTAg`) at the beginning of the line. 
    """
    m = tag_rx.match(line)
    if m is not None: return m.groups()
    else: return None

In [None]:
parse_any_tag("#nbx something else")

In [None]:
#|export
def parse_tag(name):
    """Splits off a specified tag only"""

    def p(line):
        r = parse_any_tag(line)
        if r is None: return None
        else: 
            if r[0] != name: return None
            else: return r
    return p

In [None]:
assert parse_tag("nbx")("#nbx")[0]    == "nbx"
assert parse_tag("nbx")("#nbx ")[0]   == "nbx"
assert parse_tag("nbx")("#nbx: ")[0]  == "nbx"
assert parse_tag("nbx")("#nbx : ")[0] == "nbx"
assert parse_tag("nbx")("#xarg  something else ") == None

In [None]:
#|export
def result_or_none(y:"parser return"):
    if y is None: return None
    else: return y[0]

In [None]:
assert result_or_none(parse_any_tag("#nbx")) == "nbx"
assert result_or_none(parse_any_tag("#nbx ")) == "nbx"
assert result_or_none(parse_any_tag("# nbx something else ")) == None

## Parsing `#nbx_meta`

In [None]:
#|export
import re
flag_rx = re.compile(r"(?:[\-]+)(?P<name>[a-zA-Z_\-]+)")

def flagname(line):
    """
    Splits off the name of a flag `--flagename`. 
    """
    m = flag_rx.match(line)
    if m is not None: return m.groupdict()["name"]
    else: return None
    
def is_flag(line): return flagname(line) is not None

def pairs(arr):
    n = len(arr); 
    assert n%2==0
    ps = []
    for i in range(n//2): ps.append((arr[2*i],arr[2*i+1]))
    return ps

In [None]:
is_flag("-flag_name"), flagname("--some-name"), flagname("-some_other_name")

In [None]:
#|export
import shlex
def parse_nbx_meta(line):    
    _, r = parse_tag("nbx_meta")(line)
    args = shlex.split(r)
    if len(args) % 2 != 0: return None
    
    d = {}
    for k,v in pairs(args):
        if not is_flag(k): return None
        d[flagname(k)] = v
    
    return (d, "")


In [None]:
print(parse_nbx_meta("#nbx_meta  --fname experiment.py  -s ./src  "))
print(parse_nbx_meta("#nbx_meta  experiment.py  "))
print(parse_nbx_meta("#nbx_meta -n  experiment.py  --src src "))

## Parsing `xarg`

In [None]:
#|export
xarg_rx = re.compile(r"""
^(?P<name>[^=]+)=(?P<val>[^;#]+)
(?:(?:;(?P<sweep>[^\#]*))?(?:\#(?P<help>.*))?)?
$""", re.VERBOSE)

def strip(s):
    return s.strip()

def strip_or_none(s):
    if s is None: return None
    else: return s.strip()

def strip_dict_vals(d):
    for k,v in d.items():
        d[k] = strip_or_none(v)
    return d
    
def parse_xarg(line):
    """
    Parses the line below an `xarg` tag and
    returns a dict with keys `[name,val,type,help,sweep]`.
    """

    m = xarg_rx.match(line)
    if m is None: return None
    
    d = dict(**m.groupdict())
    d = strip_dict_vals(d)
    
    v = eval(d['val'])
    d['type'] = type(v).__name__ 
    
    if d['help']  == "": d['help']  = None
    if d['sweep'] == "": d['sweep'] = None
    
    return (d, "")


In [None]:
test_lines = [
    ("x + 2 ; [] ",      None),
    ("x = 2  ",      ("x","2",None,None)),
    ("x = 2  ;  ",   ("x","2",None,None)),
    ("x = 2  ;  # ", ("x","2",None,None)),
    ("x = 'hello' ; range(2)   # some description  ", ("x", "'hello'", "range(2)", "some description")),
    ("x = 2 ;  [f(1),f(a),6,8]  ",                    ("x", "2", "[f(1),f(a),6,8]", None)),
    ("x = 'hello'    ; # some description  ",         ("x", "'hello'", None, "some description")),
    ("x = 'hello'   # some description  ",            ("x", "'hello'", None, "some description")),
]   

for line, expected in test_lines:
    r = result_or_none(parse_xarg(line))
    print(r)
    if r is None: 
        assert r == expected
        continue
    assert (r["name"], r["val"], r["sweep"], r["help"]) == expected 

In [None]:
print(parse_xarg("x = 2 ;  [f(1),f(a),6,8]  "))
print(parse_xarg("x = 'hello' ; range(2)   # some description  "))
print(parse_xarg("x = 'hello' ; # some description  "))

## Parsing a cell

In [None]:
#|export
def parse_default(line):
    return (line.rstrip(), "")

In [None]:
#|export 
def consume_line(tag, p = parse_default):
    """
    Returns a parser that 
    consume a line and parses it.
    """
    
    def q(a, src):
        if len(src) == 0: return a, []
        line = src[0]
        
        b = result_or_none(p(line))
        
        a.append((tag, line, b))
        return a, src[1:]
    
    return q

In [None]:
#|export 
def consume_block(tag, p = parse_default):
    """
    Returns a parser that consume a block of lines 
    and parse them, till you hit an empty line, 
    a comment/tag, or the end.
    """
    
    def parse(a, src):
        if len(src) == 0: return a, []
        
        line = src[0]
        
        # stopping conditions
        if line.strip() == "": return a, src
        if line.strip()[0] == "#":  return a, src
        
        # else consume line
        b = result_or_none(p(line))        
        a.append((tag, line, b))

                 
        return parse(a, src[1:])
                 
    return parse



In [None]:
#|export 
def ignore_first(p):
    """
    Return a parser that ignores the first line and 
    starts parsing starting at the next.
    """
    def q(a, src):
        if len(src) <= 1: return a, []
        else: return p(a, src[1:])
    return q


In [None]:
#|export 
def consume_line_below(tag, p = parse_default):
    return ignore_first(consume_line(tag, p=p))

def consume_block_below(tag, p = parse_default):
    return ignore_first(consume_block(tag, p=p))

In [None]:
#|export 
def parse_src(a:list, src:list, parser_dict: dict, parse_default=parse_default):
    """Parses a cell sources..."""
    if len(src) == 0: return a, []
    
    line = src[0]
    
    tag = result_or_none(parse_any_tag(line))
    
    if tag is None or tag not in parser_dict:
        b, _ = parse_default(line)
        a.append((None, line, b))
        rest = src[1:]
    else:        
        p = parser_dict[tag]
        a, rest = p(a, src)

    return parse_src(a, rest, parser_dict)

In [None]:
#|export 
PARSER_DICT = {
    'nbx'       : consume_line('nbx'),
    'nbx_meta'  : consume_line('nbx_meta', parse_nbx_meta),
    'nbx_return': consume_line_below("nbx_return"), 
    'xarg' : consume_block_below('xarg', parse_xarg),
    'xargs': consume_block_below('xarg', parse_xarg),
    'ximp' : consume_block_below('ximp'),
    'ximps': consume_block_below('ximps')}

def parse_cell(cell, parser_dict=PARSER_DICT):
    src = listmap(lambda line: line.rstrip(), cell['source'])
    a, _ = parse_src([], src, parser_dict)
    return a

In [None]:
for i,cell in enumerate(nb.cells):    
    print(f"CELL {i: >2.0f} PARSED: ( type | line | result )")
    print(f"----------------------------------------")

    a = parse_cell(cell)    
    for tag, line, res in a:
        t = f"[{tag}]"
        dots = " ...\"" if len(line) > 25 else "\""
        print(f"{t:<15s} | \"{line[:35]+dots:<40s} | {res}")
    print("\n")

# Parsing the whole thing

In [None]:
#|export
def is_none(x): return x is None
def is_not_none(x): return x is not None
def is_tagged(name):
    return lambda line: is_not_none(parse_tag(name)(line))
is_nbx = is_tagged("nbx")

In [None]:
assert is_nbx("#nbx " ) 
assert is_nbx("# nbx" ) == False
assert is_nbx(" #nbx ") 
assert is_nbx("  #nbx") 

In [None]:
#|export 
def is_nbx_cell(cell):
    if cell['cell_type'] != 'code': return False
    if not cell['source']: return False
    line0 = cell['source'][0]
    return is_nbx(line0)

In [None]:
#|export
magic_shell_rx =  re.compile(r"^\s*%{1,2}|^\s*!")

def is_magic_or_shell(line):
    """Checks if line contains a jupyter 
    magic function or shell command"""
    m = magic_shell_rx.match(line)
    return m is not None

In [None]:
assert is_magic_or_shell("%pwd ")
assert is_magic_or_shell("%%capture ")
assert is_magic_or_shell("!ls")

In [None]:
#|export
from collections import defaultdict

def move_to_front(arr, i):
    return [arr[i]] + arr[:i] + arr[i+1:]
    
XARG_MISSING = """
        
#xargs
xtask = 0
xdir  = './'

"""

def parse_nb(nb, parser_dict=PARSER_DICT):
    """Parses a notebook bunch"""
    
    # Parse all of the `nbx` cells
    nbx_cells = filter(is_nbx_cell, nb.cells)
    A = defaultdict(list)
    for cell in nbx_cells:
        a = parse_cell(cell, parser_dict=PARSER_DICT)

        for t, line, b in a:
            if t is None: t = 'func_body'
            A[t].append(b)
        
    # Make sure xtask and xdir are present
    argdict = {}
    for i,x in enumerate(A["xarg"]): argdict[x['name']] = i
    if "xdir" not in argdict or "xtask" not in argdict:
        raise NameError("YOU NEED TO ADD X-ARGS `xdir` and `xtask`(see below):..."+XARG_MISSING)
    move_to_front(A["xarg"], argdict["xdir"])
    move_to_front(A["xarg"], argdict["xtask"])
    
    # Parameter sweeps
    A["sweep"]     = []
    A["non_sweep"] = []
    for x in A["xarg"]:
        if x["sweep"] is None: 
            A["non_sweep"].append(x)
        else: 
            A["sweep"].append(x)
            
        
    # Meta data  
    if "nbx_meta" in A: A["nbx_meta"] = A["nbx_meta"][0]
    else: A["nbx_meta"] = {}
    A["nbx_meta"]["nbname"] = nb["fname"]
            
    # Set the return value
    if "nbx_return" in A:
        A["nbx_return"] = A["nbx_return"][-1]

    return Bunch(**A)

In [None]:
nb  = load_nb("nbx_example/test_notebook.ipynb")
pnb = parse_nb(nb)
print(f"Keys NB:\n\t{nb.keys()}")
print(f"Keys Parsed NB:\n\t{pnb.keys()}")
print(f"Keys nbx_meta:\n\t{pnb.nbx_meta.keys()}")

# Creating the nbx bundle

In [None]:
#|export
from pathlib import PurePosixPath as Path
import pkg_resources
import importlib
from cgnai.utils import cgnai_lib
from cgnai.nbx.templ import *
import os

def add_if_necessary(d, k, v):
    if k not in d:
        d[k] = v
        
TEMPLATE_PATH = cgnai_lib()/"nbx/templates/"
TEMPLATE_PATH 

In [None]:
#|export 
def create_script(fname, tpl, vdict={}):
    """Create script from template and value dict""" 
    return create_file_from_template(TEMPLATE_PATH/tpl, fname, vdict)

## `experiment.py`

In [None]:
#|export 
def create_experiment(pnb, fname="experiment.py", tpl="experiment.tpl"):
    return create_script(fname, tpl, pnb)

## `confspace.yaml`

In [None]:
#|export
from cgnai.fileio import dump, load

def create_confspace(pnb, fname="confspace.json"):
    meta = {"nbname": str(pnb["nbx_meta"]["nbname"])}
    C = dict(info=meta, src=dict(), default_src=dict())
    default = {}
    for x in pnb["xarg"]:
        key, val, sweep = x["name"], x["val"], x["sweep"]
        C["default_src"][key] = val
        if sweep is None: sweep = f"[{val}]"
        C["src"][key] = sweep
    dump(C, str(fname))
    
    return C

## `wrapper.py`

In [None]:
#|export    
def create_wrapper(pnb, 
                   fname = "wrapper.py", 
                   xname = "experiment.py", 
                   confspace = "confspace.json", 
                   tpl = "wrapper.tpl"):
    
    stem = Path(xname).stem
    return create_script(fname, tpl, 
                         vdict = dict(module=stem, confspace=confspace))

## `runner.sh`

In [None]:
#|export
def create_runner(fname="runner.sh", wrapper="wrapper.py", tpl="run.tpl"):
    return create_script(fname, tpl, vdict=dict(wrapper=wrapper))

## `nbx_bundle/`

In [None]:
#|export
# from distutils.dir_util import copytree
import shutil
from cgnai.utils import run_bash
from importlib import import_module
import sys

def create_bundle(xdir, nbpath=None):
    """
    Creates a nbx-bundle at the given directory
    from a notebook.
    
    The notebook is either specified by its path, or 
    in case of "None" created from the notebook 
    the function is called from.
    """
    
    if not os.path.exists(xdir): os.makedirs(xdir)
    if nbpath is None: nbpath = ipynbname.path()
    xdir, nbpath = Path(xdir), Path(nbpath)
    
    pnb = parse_nb(load_nb(nbpath))

    create_script(xdir/"__init__.py", "init.tpl", vdict={})
    create_experiment(pnb, fname=xdir/"experiment.py", tpl="experiment.tpl")
    create_confspace( pnb, fname=xdir/"confspace.json")
    create_wrapper(   pnb, fname=xdir/"wrapper.py", xname="experiment.py", confspace="confspace.json", tpl="wrapper.tpl")
    create_runner(fname=xdir/"runner.sh", wrapper="wrapper.py", tpl="run.tpl")
    
    # Btw: -B suppresses the creation of __pycache__
    num_confs, = run_bash(f"python -B {xdir/'wrapper.py'} info")
    num_confs = int(num_confs.strip())

    
    print(f"Creating bundle:\n")
    if "src" in pnb["nbx_meta"]:
        xsrc = pnb["nbx_meta"]["src"]
        if os.path.exists(nbpath.parent/xsrc): 
            print(f"\t{xdir/xsrc/'*'}")  
            
            if os.path.exists(str(xdir/xsrc)): 
                shutil.rmtree(str(xdir/xsrc))    
                
            shutil.copytree(str(nbpath.parent/xsrc), str(xdir/xsrc))
            
            
    print(f"\t{xdir/'experiment'}")    
    print(f"\t{xdir/'confspace.json'} (#confs = {num_confs})")
    print(f"\t{xdir/'wrapper.py'}")
    print(f"\t{xdir/'runner.sh'}\n")
    


In [None]:
!rm -r nbx_example/_nbx
create_bundle("nbx_example/_nbx", "nbx_example/test_notebook.ipynb")
!ls nbx_example/_nbx