In [None]:
#|default_exp doclinks

# doclinks
> Generating a documentation index from a module

In [None]:
#|export
from nbdev.config import *
from nbdev.maker import *
from nbdev.export import *
from nbdev.imports import *

from fastcore.script import *
from fastcore.utils import *
from fastcore.meta import delegates

import ast,contextlib,re
import pkg_resources,importlib
from astunparse import unparse

from pprint import pformat
from urllib.parse import urljoin
from importlib import import_module

In [None]:
#|hide
from IPython.display import Markdown,display
from fastcore.test import *
from pdb import set_trace
from importlib import reload

## Creating the module index

In [None]:
#|export
def _sym_nm(klas, sym): return f'{unparse(klas).strip()}.{sym.name}'

def _binop_leafs(bo, o):
    "List of all leaf nodes under a `BinOp`"
    def f(b): return _binop_leafs(b, o) if isinstance(b, ast.BinOp) else [_sym_nm(b,o)]
    return f(bo.left) + f(bo.right)

def patch_name(o):
    "If `o` is decorated with `patch` or `patch_to`, return its class-prefix name"
    if not isinstance(o, (ast.FunctionDef,ast.AsyncFunctionDef)): return o.name
    d = first([d for d in o.decorator_list if decor_id(d).startswith('patch')])
    if not d: return o.name
    nm = decor_id(d)
    if nm=='patch': 
        a = o.args.args[0].annotation
        if isinstance(a, ast.BinOp): return _binop_leafs(a, o)
    elif nm=='patch_to': a = o.decorator_list[0].args[0]
    else: return o.name
    return _sym_nm(a,o)

In [None]:
def _test_patch(code): return patch_name(ast.parse(code).body[0])
s = "@patch\ndef _f(self:_T): ..."
test_eq('_T._f', _test_patch(s))

In [None]:
s = "@patch_to(_T)\ndef _g(self): ..."
test_eq('_T._g', _test_patch(s))

In [None]:
# Get all patched classes when patching with a union
s = "@patch\ndef _f(self:_T|_U|_V): ..."
test_eq(_test_patch(s), ['_T._f', '_U._f', '_V._f'])

In [None]:
#|export
def _get_modidx(pyfile, code_root, nbs_path):
    "Get module symbol index for a Python source file"
    cfg = get_config()
    rel_name = str(pyfile.resolve().relative_to(code_root))
    mod_name = '.'.join(rel_name.rpartition('.')[0].split('/'))  # module name created by pyfile
    # https://regex101.com/r/3jT381/1
    # Must use [0::2] here as re.split returns capturing group info inline, but we are using
    # a negative lookbehind and don't care about the group, so we skip each of them
    cells = re.split(r'(?<!(f\'\'\'|f"""))\n# %% ', Path(pyfile).read_text())[0::2]
    # cells = Path(pyfile).read_text().split("\n# %% ")
    print("PYFILE:", pyfile)
    for c in cells:
        print("CELL-DLEEN")
        print(c)
        print("CELL-DLEEN")
    print("DONEE!!!")
    #.split("\n# %% ")
    nburl =  urljoin(cfg.doc_host,cfg.doc_baseurl)

    _def_types = ast.FunctionDef,ast.AsyncFunctionDef,ast.ClassDef
    d = {}
    for cell in cells[1:]:  # First cell is autogenerated header
        print("CELL")
        print(cell)
        print("CELL")
        top,*rest = cell.splitlines() # First line is cell header
        nb = top.split()[0]
        if nb != 'auto':
            print("NB:",nb)
            nbpath = ((pyfile.parent)/nb).resolve()  # NB paths are stored relative to .py file
            print("NBPATH:", nbpath)
            loc = nbpath.relative_to(nbs_path).with_suffix('.html')
            loc = urljoin(nburl, re.sub(r'\d+[a-zA-Z0-9]*_', '', str(loc)).lower()) + '#'
            source = f'{cfg.git_url}/blob/{cfg.branch}/{rel_name}'

            def _stor(nm, tree, pre=''): d[f'{mod_name}{pre}.{nm}'] = loc+nm.lower(),source
            print("FOO!!!")
            print('\n'.join(rest))
            ast.parse('\n'.join(rest))
            print("FOO!!!")
            for tree in ast.parse('\n'.join(rest)).body:
                if isinstance(tree, _def_types): _stor(patch_name(tree), tree)
                if isinstance(tree, ast.ClassDef):
                    for t2 in tree.body:
                        if isinstance(t2, _def_types): _stor(t2.name, t2, f'.{tree.name}')
    return {mod_name: d}

In [None]:
#|export
def _build_modidx(dest=None, nbs_path=None, skip_exists=False):
    "Create _modidx.py"
    if dest is None: dest = get_config().path('lib_path')
    nbs_path = Path(nbs_path or get_config().path('nbs_path')).resolve()
    if os.environ.get('IN_TEST',0): return
    idxfile = dest/'_modidx.py'
    if skip_exists and idxfile.exists(): return
    with contextlib.suppress(FileNotFoundError): idxfile.unlink()
    if idxfile.exists(): res = exec_local(idxfile.read_text(), 'd')
    else: res = dict(syms={}, settings={}) 
    res['settings'] = {k:v for k,v in get_config().d.items()
                       if k in ('doc_host','doc_baseurl','lib_path','git_url','branch')}
    code_root = dest.parent.resolve()
    for file in globtastic(dest, file_glob="*.py", skip_file_re='^_', skip_folder_re="\.ipynb_checkpoints"):
        res['syms'].update(_get_modidx((dest.parent/file).resolve(), code_root, nbs_path=nbs_path))
    idxfile.write_text("# Autogenerated by nbdev\n\nd = "+pformat(res, width=140, indent=2, compact=True))

In [None]:
# _build_modidx()

In [None]:
#| hide
import shutil

In [None]:
#| hide
#| eval:false
with contextlib.suppress(FileNotFoundError): shutil.rmtree('tmp')

everything_fn = '../tests/01_everything.ipynb'
nb_export('../tests/00_some.thing.ipynb', 'tmp')
nb_export(everything_fn, 'tmp')

mod_fn = Path('tmp')
_build_modidx(mod_fn, nbs_path=Path('../tests/').resolve())

d = exec_import('tmp._modidx', 'd')['d']
d['syms']['tmp.some.thing']

{'tmp.some.thing.h_n': ('https://nbdev.fast.ai/everything.html#h_n',
  'https://github.com/fastai/nbdev/blob/master/tmp/some/thing.py')}

In [None]:
#|export
@delegates(globtastic, but=['file_glob', 'skip_folder_re', 'skip_file_re'])
def nbglob(path=None, skip_folder_re = '^[_.]', file_glob='*.ipynb', skip_file_re='^[_.]',
           recursive=True, key='nbs_path', as_path=False, **kwargs):
    "Find all files in a directory matching an extension given a config key."
    path = Path(path or get_config().path(key))
    if recursive is None: recursive=str2bool(get_config().recursive)
    res = globtastic(path, file_glob=file_glob, skip_folder_re=skip_folder_re,
                     skip_file_re=skip_file_re, recursive=True, **kwargs)
    return res.map(Path) if as_path else res

In [None]:
#|export
def nbglob_cli(
    path:str=None, # Path to notebooks
    symlinks:bool=False, # Follow symlinks?
    file_glob:str='*.ipynb', # Only include files matching glob
    file_re:str=None, # Only include files matching regex
    folder_re:str=None, # Only enter folders matching regex
    skip_file_glob:str=None, # Skip files matching glob
    skip_file_re:str='^[_.]', # Skip files matching regex
    skip_folder_re:str = '^[_.]', # Skip folders matching regex
    recursive:bool=True): # Search subfolders too
    "Find all files in a directory matching an extension given a config key."
    return nbglob(path, symlinks=symlinks, file_glob=file_glob, file_re=file_re, folder_re=folder_re,
                  skip_file_glob=skip_file_glob, skip_file_re=skip_file_re, skip_folder_re=skip_folder_re)

In [None]:
#|export
@call_parse
@delegates(nbglob_cli)
def nbdev_export(
    path:str=None, # Path or filename
    **kwargs):
    "Export notebooks in `path` to Python modules"
    if os.environ.get('IN_TEST',0): return
    files = nbglob(path=path, **kwargs)
    for f in files: nb_export(f)
    add_init(get_config().path('lib_path'))
    _build_modidx()

In [None]:
#|hide
from fastcore.test import *

In [None]:
#|export
def _settings_libs():
    try: # settings.ini doesn't exist yet until you call nbdev_new
        cfg = get_config()
        return cfg.get('strip_libs', cfg.get('lib_path', 'nbdev')).split()
    except FileNotFoundError: return 'nbdev'

In [None]:
#|export
class NbdevLookup:
    "Mapping from symbol names to docs and source URLs"
    def __init__(self, strip_libs=None, incl_libs=None, skip_mods=None):
        if strip_libs is None: strip_libs = _settings_libs()
        skip_mods = setify(skip_mods)
        strip_libs = L(strip_libs)
        if incl_libs is not None: incl_libs = (L(incl_libs)+strip_libs).unique()
        # Dict from lib name to _nbdev module for incl_libs (defaults to all)
        self.entries = {o.name: o.load() for o in pkg_resources.iter_entry_points(group='nbdev')
                       if incl_libs is None or o.dist.key in incl_libs}
        py_syms = merge(*L(o['syms'].values() for o in self.entries.values()).concat())
        for m in strip_libs:
            if m in self.entries:
                _d = self.entries[m]
                stripped = {remove_prefix(k,f"{mod}."):v
                            for mod,dets in _d['syms'].items() if mod not in skip_mods
                            for k,v in dets.items()}
                py_syms = merge(stripped, py_syms)
        self.syms = py_syms

    def __getitem__(self, s): return self.syms.get(s, None)
    def doc(self, s):
        res = self[s]
        return res[0] if isinstance(res, tuple) else res

Symbol names are taken from libraries registered using the 'nbdev' entry point. By default, all libraries with this entry point are searched, but full symbol names (including module prefix) are required.

In [None]:
c = NbdevLookup()
assert c.doc('nbdev.doclinks.NbdevLookup').startswith('http')
assert c.doc('numpy.array').startswith('http')
assert c.doc('NbdevLookup').startswith('http')
assert not c.doc('array')

Pass `strip_libs` to list libraries which should be available without requiring a module prefix.

In [None]:
c = NbdevLookup(strip_libs=['nbdev', 'nbdev_numpy'])
assert c.doc('array').startswith('http')

nbdev itself includes `nbdev_lookup`, an instantiated `NbdevLookup` with `strip_libs=nbdev`.

In [None]:
assert NbdevLookup().doc('NbdevLookup').startswith('http')

## Backticks

In [None]:
#|export
@patch
def _link_sym(self:NbdevLookup, m):
    l = m.group(1)
    s = self.doc(l)
    if s is None: return m.group(0)
    l = l.replace('\\', r'\\')
    return rf"[`{l}`]({s})"

_re_backticks = re.compile(r'`([^`\s]+)`')
@patch
def link_line(self:NbdevLookup, l): return _re_backticks.sub(self._link_sym, l)

@patch
def linkify(self:NbdevLookup, md):
    if md:
        in_fence=False
        lines = md.splitlines()
        for i,l in enumerate(lines):
            if l.startswith("```"): in_fence=not in_fence
            elif not l.startswith('    ') and not in_fence: lines[i] = self.link_line(l)
        return '\n'.join(lines)

In [None]:
md = """This is a link to `numpy.array` and to `get_config` but not a link to `foobar`.
And not a link to <code>dict2nb</code>.

    This is not a link to `get_config`

```
This isn't a link to `get_config` either
```"""

In [None]:
#|eval: false
c = NbdevLookup('nbdev')
Markdown(c.linkify(md))

This is a link to [`numpy.array`](https://numpy.org/doc/stable/reference/generated/numpy.array.html#numpy.array) and to [`get_config`](https://nbdev.fast.ai/config.html#get_config) but not a link to `foobar`.
And not a link to <code>dict2nb</code>.

    This is not a link to `get_config`

```
This isn't a link to `get_config` either
```

## Export -

In [None]:
#|eval: false
#|hide
Path('../nbdev/export.py').unlink(missing_ok=True)
nbdev_export()

g = exec_new('import nbdev.export')
assert hasattr(g['nbdev'].export, 'nb_export')
from nbdev._modidx import d
assert d['syms']['nbdev.doclinks']['nbdev.doclinks.NbdevLookup'][0].startswith('http')