In [None]:
#|hide
#|default_exp nbio

# nbio

> Reading and writing Jupyter notebooks

In [None]:
#|export
from fastcore.imports import *
from fastcore.foundation import *
from fastcore.basics import *
from fastcore.xtras import *

import ast,functools
from pprint import pformat,pprint

In [None]:
#|hide
import tempfile

## Reading a notebook

A notebook is just a json file.

In [None]:
minimal_fn = Path('../tests/minimal.ipynb')
minimal_txt = AttrDict(minimal_fn.read_json())

It contains two sections, the `metadata`...:

In [None]:
minimal_txt.metadata

{'kernelspec': {'display_name': 'Python 3 (ipykernel)',
  'language': 'python',
  'name': 'python3'}}

...and, more importantly, the `cells`:

In [None]:
minimal_txt.cells

[{'cell_type': 'markdown',
  'metadata': {},
  'source': ['## A minimal notebook']},
 {'cell_type': 'code',
  'execution_count': None,
  'metadata': {},
  'outputs': [{'data': {'text/plain': ['2']},
    'execution_count': None,
    'metadata': {},
    'output_type': 'execute_result'}],
  'source': ['# Do some arithmetic\n', '1+1']}]

The second cell here is a `code` cell, however it contains no outputs, because it hasn't been executed yet. To execute a notebook, we first need to convert it into a format suitable for `nbclient` (which expects some `dict` keys to be available as attrs, and some available as regular `dict` keys). Normally, `nbformat` is used for this step, but it's rather slow and inflexible, so we'll write our own function based on `fastcore`'s handy `dict2obj`, which makes all keys available as both attrs *and* keys.

In [None]:
#|export
class NbCell(AttrDict):
    def __init__(self, idx, cell):
        super().__init__(cell)
        self.idx_ = idx
        if 'source' in self: self.set_source(self.source)

    def set_source(self, source):
        self.source = ''.join(source)
        if '_parsed_' in self: del(self['_parsed_'])

    def parsed_(self):
        if self.cell_type!='code' or self.source.strip()[:1] in ['%', '!']: return
        if '_parsed_' not in self: 
            try: self._parsed_ = ast.parse(self.source).body
            except SyntaxError: return # you can assign the result of ! to a variable in a notebook cell, which will result in a syntax error if parsed with the ast module.
        return self._parsed_

    def __hash__(self): return hash(self.source) + hash(self.cell_type)
    def __eq__(self,o): return self.source==o.source and self.cell_type==o.cell_type

We use an `AttrDict` subclass which has some basic functionality for accessing notebook cells.

In [None]:
#|export
def dict2nb(js):
    "Convert dict `js` to an `AttrDict`, "
    nb = dict2obj(js)
    nb.cells = nb.cells.enumerate().starmap(NbCell)
    return nb

We can now convert our JSON into this `nbclient`-compatible format, which pretty prints the source code of cells in notebooks.

In [None]:
minimal = dict2nb(minimal_txt)
cell = minimal.cells[1]
cell

```json
{ 'cell_type': 'code',
  'execution_count': None,
  'idx_': 1,
  'metadata': {},
  'outputs': [{'data': {'text/plain': ['2']}, 'execution_count': None, 'metadata': {}, 'output_type': 'execute_result'}],
  'source': '# Do some arithmetic\n1+1'}
```

The abstract syntax tree of source code cells is available in the `parsed_` property:

In [None]:
cell.parsed_(), cell.parsed_()[0].value.op

([<ast.Expr at 0x10f5174c0>], <ast.Add at 0x10ccc8340>)

In [None]:
#|export
def read_nb(path):
    "Return notebook at `path`"
    res = dict2nb(Path(path).read_json())
    res['path_'] = str(path)
    return res

This reads the JSON for the file at `path` and converts it with `dict2nb`. For instance:

In [None]:
minimal = read_nb(minimal_fn)
str(minimal.cells[0])

"{'cell_type': 'markdown', 'metadata': {}, 'source': '## A minimal notebook', 'idx_': 0}"

The file name read is stored in `path_`:

In [None]:
minimal.path_

'../tests/minimal.ipynb'

## Writing a notebook

In [None]:
#|export
def nb2dict(d, k=None):
    "Convert parsed notebook to `dict`"
    if k in ('source',): return d.splitlines(keepends=True)
    if isinstance(d, (L,list)): return list(L(d).map(nb2dict))
    if not isinstance(d, dict): return d
    return dict(**{k:nb2dict(v,k) for k,v in d.items() if k[-1] != '_'})

This returns the exact same dict as is read from the notebook JSON.

In [None]:
minimal_fn = Path('../tests/minimal.ipynb')
minimal = read_nb(minimal_fn)

minimal_dict = minimal_fn.read_json()
assert minimal_dict==nb2dict(minimal)

In [None]:
#|export
def nb2str(nb):
    "Convert `nb` to a `str`"
    if isinstance(nb, (AttrDict,L)): nb = nb2dict(nb)
    return dumps(nb, sort_keys=True, indent=1, ensure_ascii=False) + "\n"

To save a notebook we first need to convert it to a `str`:

In [None]:
print(nb2str(minimal)[:45])

{
 "cells": [
  {
   "cell_type": "markdown",


In [None]:
#|export
def write_nb(nb, path):
    "Write `nb` to `path`"
    with maybe_open(path, 'w', encoding='utf-8') as f: f.write(nb2str(nb))

This returns the exact same string as saved by Jupyter.

In [None]:
tmp = Path('tmp.ipynb')
try:
    minimal_txt = minimal_fn.read_text()
    write_nb(minimal, tmp)
    assert minimal_txt==tmp.read_text()
finally: tmp.unlink()

## export -

In [None]:
#|hide
#|eval: false
from nbdev.doclinks import nbdev_export
nbdev_export()