# `nbexport` — Notebook exporter
> Export python modules from notebooks without the full overhead of `nbdev`. 

This module is based on the export logic in [nbdev](https://github.com/fastai/nbdev). Nbdev is great but it's a little too heavyweight. The customizations to the quarto integration are too much for me. I'd like to continue using sphinx/autodoc for docs. I like to have code in python modules but often I want to develop code in a notebook.


These functions can collect code from the notebooks in a directory and write python modules.  Like nbdev, any code cell containing an import statement is written to the module, as well as any code cell marked with the `#|export` directive.



**eg:** This notebook is the source for the file `nbexport.py`

In [None]:
#|default_exp: nbexport

In [None]:
import nbformat
import itertools
import os
import re

# nbexport Directives

Directives are written at the top of each cell and tell nbexport what to do with the code. They are very similar to `nbdev`. 

### The directives supported by nbexport

| directive | meaning |
|-----------|---------|
| default_exp | the module to target with code in this directory | 
| export [module] | export the code in this cell
| noexport | do not export the code in this cell, even if it contains an import statement
| skip_notebook | do not process any further cells in this notebook



In [None]:
#|export

from enum import Enum
Directive = Enum("Directive", [
    "default_exp",
    "export",
    "noexport",
    "skip_notebook"
])

# Develop using sample data from this notebook

Use this notebook to doogfood nbexport and create the nbexport.py module. Nbformat reads a notebook into a list of cell dictionaries. Load this notebook with nbformat and use it as an example...

Two interesting cells are loaded: `first_code_cell` and `first_cell_with_directive`

In [None]:
nb=nbformat.read("nbs/01_NbExport.ipynb", nbformat.NO_CONVERT)

### What does the imported notebook look like?

In [None]:
nb.keys()

dict_keys(['cells', 'metadata', 'nbformat', 'nbformat_minor'])

In [None]:
len(nb.cells)

43

In [None]:
nb.cells[0]

{'cell_type': 'markdown',
 'id': '75918589-7592-4c9c-8a87-6a3afce041b4',
 'metadata': {},
 'source': '# `nbexport` — Notebook exporter\n> Export python modules from notebooks without the full overhead of `nbdev`. '}

### Import sample cells for testing

In [None]:
first_code_cell=[cell for cell in nb.cells if cell.cell_type=='code'][0]
first_code_cell

{'cell_type': 'code',
 'execution_count': None,
 'id': '4907343b-a001-4cf8-a591-f296665133e2',
 'metadata': {},
 'outputs': [],
 'source': '#|default_exp: nbexport'}

In [None]:
first_cell_with_directive=[cell for cell in nb.cells if cell.cell_type=='code' and cell.source.startswith("#|")][0]

### split_metadata
> Return the metadata directives from the top of a cell

In [None]:
#|export
def split_metadata(code):
    """
    Remove the metadata from the top of a code cell
    """
    lines = code.splitlines()
    meta_lines = []
    rest_lines = []
    for num,line in enumerate(lines):
        if line.startswith("#|"):
            meta_lines.append(line[2:])
        else:
            rest_lines = lines[num:]
            break
    return meta_lines, '\n'.join(rest_lines)        


In [None]:
split_metadata(first_code_cell.source)

(['default_exp: nbexport'], '')

In [None]:
split_metadata(first_cell_with_directive.source)

(['default_exp: nbexport'], '')

### metadata_dict
> Parse metadata directives into a dictionary

In [None]:
#|export
def metadata_dict(metadata_lines):
    metadata = {}
    for line in metadata_lines:
        key, val, *_ = line.split(":",1) + [None]            
        key = key.strip()
        if val:
            val = val.strip()
        metadata[key]=val
    return metadata

In [None]:
metadata, source = split_metadata(first_cell_with_directive.source)
metadata_dict(metadata)

{'default_exp': 'nbexport'}

### parse_code_cell
> split metadata directives and parse

In [None]:
#|export
def parse_code_cell(cell):
    metadata, source = split_metadata(cell.source)
    return metadata_dict(metadata), source

In [None]:
parse_code_cell(first_code_cell)

({'default_exp': 'nbexport'}, '')

In [None]:
#|export
# Break up the string __notebook__export__main__ so that it is not substituted
NOTEBOOK_MAIN_STR= '__' + 'notebook_export_main' + '__' 

### scan_notebooks
> Read a list of notebook files and collect sources into modules

In [None]:
#|export
def scan_notebooks(file_list, modules=None):
    if modules is None:
        modules = {}
    for nbfile in file_list:
        default_module = 'main.py'
        nb=nbformat.read(nbfile, nbformat.NO_CONVERT)
        for cell in nb.cells:
            export_module = None
            if cell.cell_type != 'code':
                continue
            metadata, source = parse_code_cell(cell)
            if Directive.skip_notebook.name in metadata:
                break
                
            if Directive.default_exp.name in metadata:
                default_module = metadata[Directive.default_exp.name] + ".py"
                export_module = default_module
            
            if Directive.noexport.name in metadata:
                continue
                
            if Directive.export.name in metadata:
                export_module = metadata[Directive.export.name] or default_module
                
            elif re.search("^[\\s]*(from .* )?import", source, re.MULTILINE):
                export_module = default_module
        
            if export_module:
                modules.setdefault(export_module,[]).append(source)
            
    return modules


In [None]:
#|export
DEFAULT_PREAMBLE="""#!/usr/bin/env python3
# AUTOMATICALLY GENERATED FILE - Do not edit
#
# This file was generated from jupyter notebooks
"""

### write_modules
> Given a modules dictionary, write out the python sources

In [None]:
#|export
def write_modules(module_dir, modules, preamble=DEFAULT_PREAMBLE):
    "Once notebooks have been scanned, export the resulting modules"
    if not os.path.isdir(module_dir):
        os.makedirs(module_dir)
    for module, sources in modules.items():
        
        sources = [ source.replace(NOTEBOOK_MAIN_STR, '__main__') for source in sources]
    
        with open(os.path.join(module_dir, module), "w") as f:
            print(f"Writing {module}")
            f.write(preamble)
            f.write("\n\n")
            f.write("\n\n".join(sources))
            

### export_notebook_dir
> Read the notebooks in a directory and export to python

In [None]:
#|export
def export_notebook_dir(nb_dir, module_dir="lib"):
    "Enumerate the notebooks in nb_dir and export them all"
    notebooks=[ os.path.join(nb_dir,f) for f in os.listdir(nb_dir) if f.endswith(".ipynb") ]
    export_notebooks(notebooks, module_dir)

### export_notebooks
> Read a list of notebooks and export to python

In [None]:
#|export
def export_notebooks(notebooks, module_dir):
    "Export the files listed in *notebooks*"
    modules = scan_notebooks(notebooks)
    write_modules(module_dir, modules)
    

## How to write main

Writing traditional python main wouldn't work because it would also run in the notebook. Instead, use the special symbol `__notebook__export__main__`, which will be replaced by `__main__` during export:

```python

# runs in module and notebook
if __name__ == "__main__":
    do_main_thing()
    
# runs in module
if __name__ == "__notebook__export__main__":
    do_main_thing()
```

In [None]:
#|export
def main():
    import sys
    USAGE=f"""
    nbexport: export one or more notebooks as python modules
    {os.path.basename(sys.argv[0])} [--help] [-d <export_dir>] notebook_file...
    """
    export_dir="."
    args = sys.argv[1:]
    
    if len(args)==0 or args[0] in ('-h', '--help', '-H'):
        print(USAGE)
        print("Directives:")
        print("\n".join(d.name for d in Directive))
        sys.exit(1)
        
    if args[0] == "-d":
        export_dir = args[1]
        args = args[2:]

    print(f'exporting {", ".join(args)}')
    print(f'export dir: {export_dir}')
    export_notebooks(args, export_dir)
    

In [None]:
#|export
if __name__ == "__notebook_export_main__":
    main()

### Export this notebook to a python module

In [None]:
export_notebooks(["nbs/01_NbExport.ipynb"], "src/nbexport")

Writing nbexport.py


In [None]:
export_notebook_dir("nbs", "src")

Writing nbexport.py
