In [1]:
#| default_exp nb_hooks

In [2]:
#| export
from __future__ import annotations

# Notebook/IPython hooks
> Pure IPython facilities to help us inspect, control, and modify cell outputs. 

In subsequent notebooks I'll add more facilities to help us capture the current state of the notebook (21_nbstate.ipynb) and make the link bidirectional (32_bridget.ipynb).


# Prologue

In [3]:
#| export
import ast
import urllib.parse
import uuid
from collections import deque

import fastcore.all as FC
from fastcore.xml import to_xml
from fasthtml.core import unqid
from IPython.core.error import InputRejected
from IPython.core.getipython import get_ipython
from IPython.display import display
from IPython.display import DisplayHandle
from olio.basic import bundle_path
from olio.basic import val_at as at
from olio.common import AD
from olio.common import update_


In [4]:
#| export
from bridget.bridge import Brd_Mark
from bridget.helpers import bridge_cfg
from bridget.helpers import DEBUG
from bridget.helpers import id_gen
from bridget.nb import NB
from bridget.nb import NBCell
from bridget.nb import NBOutput


In [5]:
import os
import random
import sys
import time
from contextlib import contextmanager
from copy import deepcopy
from typing import Any

import fasthtml.components as ft
from fastcore.test import *
from fasthtml.core import L
from fasthtml.xtend import Script
from IPython.core.displayhook import CapturingDisplayHook
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import HTML
from IPython.display import JSON
from IPython.display import Markdown
from IPython.utils.capture import CapturedIO
from olio.basic import empty
from olio.basic import vals_at
from olio.common import setup_console
from olio.common import shorten


In [6]:
import bridget.fasthtml_patching
from bridget.helpers import skip
from bridget.helpers import DetailsJSON

In [7]:
from fasthtml.components import Details, Summary, Ul, Li, Span
from fasthtml.components import Div, P, Pre, Text, Span, show, B, Details, Pre, A, Br
from fasthtml.xtend import Style


----


In [8]:
In:list
Out:dict
_:Any

In [9]:
os.environ['DEBUG_BRIDGET'] = 'True'

In [10]:
#| exporti

BUNDLE_PATH = bundle_path(__name__)
new_id = id_gen()
_n = '\n'

In [11]:
console, cprint = setup_console(140)
IDISPLAY = display
print(f"{DEBUG()=}")
bridge_cfg

DEBUG()=True


{'auto_show': False, 'auto_mount': False, 'auto_id': False, 'bundle_cfg': {'out_dir': [Path('/Users/vic/dev/repo/project/bridget/bridget/js')], 'rewrite_imports': True, 'import_name': 'brdimport'}, 'bootstrap': False, 'current_did': None}

----

In [12]:
#| export

def _cellid_from(s): return urllib.parse.unquote(urllib.parse.urlparse(s).fragment)
def _nburi_from(s): return urllib.parse.urlparse(s).netloc + urllib.parse.urlparse(s).path
def cellspec_from(s): return (uri := urllib.parse.urlparse(s)).path, urllib.parse.unquote(uri.fragment)

In [13]:
nburi, cellid = cellspec_from('/a/b/d#Y114sZmlsZQ==')
nburi, cellid

('/a/b/d', 'Y114sZmlsZQ==')

VSCode cell id format.

# NB update

In [14]:
__nb__ = NB()

In [15]:
#| exporti

@FC.patch
def update(self:NB, cell_id, source=None, execution_count=None, outputs=None):
    if not (cell := self[cell_id]): 
        cell = NBCell(dict(idx=len(self.cells), source=source, id=cell_id, cell_type='code'))
        self._apply_diff({"added": [cell], "cellCount": len(self.cells)+1})
    else: 
        update_(cell, source=source, execution_count=execution_count, outputs=outputs)
        self._rebuild()

In [16]:
nb = NB()
nb.update(cell_id='abc', source='print(1)')  # type: ignore
test_eq(nb[0].source, 'print(1)')

In [17]:
#| exporti

shell = get_ipython()
assert shell is not None

def _get_outputs(exec_result):
    outputs = []
    if exec_result and exec_result.result is not None:
        format_dict, md_dict = shell.display_formatter.format(exec_result.result)  # type: ignore
        if format_dict: outputs.append(NBOutput(dict(
            output_type='execute_result',
            execution_count=exec_result.execution_count,
            data=format_dict,
            metadata=md_dict,
        )))
    return outputs

In [18]:
def _update_cell(cell_id, source=None, exec_result=None):
    upd = {'execution_count': exec_result.execution_count} if exec_result else {}
    if source is not None: upd['source'] = source
    __nb__.update(cell_id, **upd, outputs=_get_outputs(exec_result))  # type: ignore

# CellExecInfo
> IPython cell execution info and \_\_nb\_\_ updater.

Simple IPython event callback that captures cell id and source code of last run cell.

**NOTE**: in this notebook, \_\_nb__ is *NOT* a valid notebook state yet. It doesn't reflect markdown cells or deleted cells or cells order. For a valid, updated in real-time, `nbformat` compliant notebook state (`NB`), see [21_nb_state.ipynb](21_nb_state.ipynb).

In [19]:
__cellinfo__: AD
__lastcellinfo__: AD

In [20]:
#| export

class CellExecInfo:
    def __init__(self, start=False): 
        self._evts, self._ns = get_ipython().events, get_ipython().user_ns  # type: ignore
        # self._active, self._nb, self._invscode = False, self._ns['__nb__'], '__vsc_ipynb_file__' in self._ns
        self._active, self._invscode = False, '__vsc_ipynb_file__' in self._ns
        if start: self.start()
    @property
    def active(self): return self._active
    def start(self):
        if not self._active: 
            self._evts.register('pre_run_cell', self.pre_run_cell)
            self._ns.update(__cellinfo__=AD(), __lastcellinfo__=None)
    def stop(self):
        if self._active:
            self._active = False
            self._evts.unregister('pre_run_cell', self.pre_run_cell)
            self._evts.unregister('post_run_cell', self.post_run_cell)
            self._ns['__cellinfo__'] = None
            self._ns['__lastcellinfo__'] = None
    def __del__(self): self.stop()

    def _getid(self, info):
        if not (id := info.cell_id): return unqid()
        return _cellid_from(id) if self._invscode else id
    def pre_run_cell(self, info): 
        if not self._active: 
            self._evts.register('post_run_cell', self.post_run_cell)
            self._active = True
        try:
            if not info.store_history: return  # debugpy usually
            self._ns['__lastcellinfo__'] = self._ns['__cellinfo__']
            self._ns['__cellinfo__'] = info =AD(source=info.raw_cell, cell_id=self._getid(info), 
                                                exec_result=AD(result=None))
            _update_cell(info.cell_id, source=info.source)
        except Exception as e:
            print(e)
    def post_run_cell(self, result):
        if not (self._active and result.info.store_history): return
        try:
            if info := self._ns['__cellinfo__']:
                info.exec_result.update(
                    execution_count=result.execution_count,
                    error_before_exec=result.error_before_exec,
                    error_in_exec=result.error_in_exec,
                )
                # info.exec_result.result may have been updated elsewhere (capture...), so don't overwrite it
                if result.result is not None and info.exec_result.result is None:
                    info.exec_result.result = result.result
                _update_cell(info.cell_id, exec_result=result)
        except Exception as e:
            print(e)


def get_info() -> AD: return get_ipython().user_ns['__cellinfo__']  # type: ignore
def get_lastinfo() -> AD: return get_ipython().user_ns['__lastcellinfo__']  # type: ignore

__csi__ = None
def get_csi(start=False):
    global __csi__
    if __csi__ is None: __csi__ = CellExecInfo()
    if start: __csi__.start()
    return __csi__

In [21]:
try: csi.stop()  # type: ignore
except Exception: pass

csi = get_csi(True)
test_eq(__cellinfo__, {})
test_eq(__lastcellinfo__, None)

In [22]:
show(DetailsJSON(__cellinfo__))
test_eq(__cellinfo__.source[:29], "show(DetailsJSON(__cellinfo__")
show(DetailsJSON(__nb__[0]))
17

17

In [23]:
show(DetailsJSON(__lastcellinfo__, openall=True))
show(__nb__[__lastcellinfo__.cell_id])

`__cellinfo__` stores information about **current** cell execution, `interactiveshell.ExecutionInfo`. `__cellinfo__.exec_result` stores the result of the cell execution, `interactiveshell.ExecutionResult`, only valid **after** the cell run.

In [24]:
(ri := random.randint(0, sys.maxsize))

1334979731030279006

In [25]:
show(DetailsJSON(__cellinfo__))
show(DetailsJSON(__lastcellinfo__, openall=True))
show(__nb__[__lastcellinfo__.cell_id])
test_eq(__lastcellinfo__.source, "(ri := random.randint(0, sys.maxsize))")

In [26]:
print(__cellinfo__.cell_id)
test_eq(__cellinfo__.source[:27], "print(__cellinfo__.cell_id)")
test_eq(In[-1][:27], "print(__cellinfo__.cell_id)")
(exe_cnt := len(In)-1)

X45sZmlsZQ==


26

In [27]:
print(_, __lastcellinfo__.exec_result.result)
test_eq(_, __lastcellinfo__.exec_result.result)
test_eq(__lastcellinfo__.exec_result.result, Out[exe_cnt])

26 26


In [28]:
display('abc')

'abc'

In [29]:
print(_, __lastcellinfo__.exec_result.result)
test_eq(None, __lastcellinfo__.exec_result.result)

26 None


In [30]:
print('13')
23

13


23

In [31]:
print(_, __lastcellinfo__.exec_result.result)
test_eq(23, __lastcellinfo__.exec_result.result)

23 23


In [32]:
csi.stop()
test_is(__cellinfo__, None)
test_is(__lastcellinfo__, None)

`__cellinfo__.source` corresponds to `In[-1]` or `_ih[-1]`.  
`__lastcellinfo__.exec_result.result` corresponds to `_` or `Out[__lastcellinfo__.exec_result.execution_count]`.

~~But we don't want the result of the last cell, we want the result of the current cell. For that, keep reading.~~

**Caveat**: the front-end is not required to send the cell id. See [nbformat Cell ids](https://nbformat.readthedocs.io/en/latest/format_description.html#cell-ids), [run_cell](https://ipython.readthedocs.io/en/stable/api/generated/IPython.core.interactiveshell.html#IPython.core.interactiveshell.InteractiveShell.run_cell).  
`VSCode` reports nbformat of newly created notebook as 4.4, but it does send the cell id, though not well formed.  
`nbclassic` does not set the cell ID even if the reported nbformat version is 4.5. Bridget creates one in this case.

In [33]:
for c in __nb__.cells: display(c)

# autoid (not used)

Note: `autoid` here is different from fasthtml `fh_cfg['auto_id']` option. Here we're trying to automatically set the `id` attribute of the wrapper element of a cell output in the front-end. If we can do so, we'll be able to target especific cell outputs.

In [34]:
# %%vfile autoid.js

# // debugger;
# function autodel(id) {
#     // debugger;
#     const me = document.querySelector(`#${id}`);
#     const parent = me.parentElement;
#     // parent.append(`test ${id}`);
#     window.setTimeout(() => {{ 
#         me.remove(); 
#         parent.setAttribute("id", `output-${id}`);
#         console.log('deleted script', id); 
#     }}, 100);
#     console.log("test script", id);
# }

In [35]:
# sc = '''
# function autodel(id) {
#     // debugger;
#     const me = document.querySelector(`#${id}`);
#     const parent = me.parentElement;
#     // parent.append(`test ${id}`);
#     window.setTimeout(() => {{ 
#         me.remove(); 
#         parent.setAttribute("id", `output-${id}`);
#         console.log('deleted script', id); 
#     }}, 100);
#     console.log("test script", id);
# }
# '''

# # autodelscr = Script(sc)
# autodelscr = ScriptV('vfile:autoid.js')
# HTML(to_xml(autodelscr))

In [36]:
# sc = '''
# autodel("{0}");
# '''

# idx = new_id()
# scr = Script(notdebug(sc.format(idx)), id=idx, type='module')
# HTML(to_xml(scr)+'aaaa')

In [37]:
# sc = '''
# autodel("{0}");
# '''
# def autoid(idx=None):
#     idx = idx or new_id()
#     return Script(notdebug(sc.format(idx)), id=idx, type='module'), idx

In [38]:
# HTML('aaaaa'+to_xml(autoid()[0]))

In [39]:
# %%HTML
#   <script id="asdfg-12345">
#     debugger;
#     me().attribute('id', 'output-asdfg-12345');
#     setTimeout(el => { el.remove(); }, 100, me('#asdfg-12345'))
#     // autodel('asdfg-12345');
#   </script>
#   <div class="uploader">aaaa</div>

In [40]:
__autoid_scr = '''
// debugger;
me().attribute('id', 'output-{0}').classAdd('bridge');
setTimeout(el => {{ el.remove(); }}, 100, me('#{0}'))
'''
def autoid(idx=None):
    idx = idx or new_id()
    return Script(__autoid_scr.format(idx), id=idx), idx

In [41]:
scr, idx = autoid()
dhdl = DisplayHandle(idx)
dhdl.display(HTML('bbbb'+to_xml(scr)))

Inspect "bbbb" output of previous cell, the parent element should have class "bridge" and id.

In [42]:
dhdl.update(HTML('cccc'+to_xml(scr)))

VSCode wipe out the element when updating the cell. We need to send again the autoid.

## DisplayId

An attempt to provide a IPython display wrapper that automatically handles the display ID to allow us to target especific cells. Not working, for now it's essentially just IPython display.

In [43]:
class DisplayId(DisplayHandle):
    def __init__(self, display_id=None):
        super().__init__(display_id or new_id())
        self._contents = None
        self._sc = to_xml(autoid(self.display_id)[0]) if bridge_cfg.auto_id else ''

    def display(self, obj='', **kwargs):
        from IPython.display import display
        self._contents = str(obj)
        display(HTML(self._contents + self._sc), display_id=self.display_id, **kwargs)

    def update(self, obj='', **kwargs):
        kwargs['update'] = True
        self.display(obj, **kwargs)

    def contents(self): return self._contents

In [44]:
dhdl = DisplayId()
dhdl.display('dddd')
test_eq(dhdl.contents(), 'dddd')

In [45]:
dhdl.update('eeee')
test_eq(dhdl.contents(), 'eeee')

In [46]:
dhdl.update()

# Bridged: IPython display_pub hook
> `display_pub` hook for `display_id` and `brd-mark`  
> Tag cell outputs with bridge metadata to target them.

In particular, it will transform every [display message](https://github.com/ipython/ipython/blob/6c8484878fd186dafaf344b8af467e646e91827d/IPython/core/display_functions.py#L85) to [transient](https://jupyter-client.readthedocs.io/en/stable/messaging.html#update-display-data) if the message has a session metadata id (`brd_did`). It will set the `display_id` of each output with that same `brd_id` value. For HTML display objects, it also marks the DOM parent element in the front-end. With this (session unique) tag, we can easily address specific outputs from Python.

This will be handy to target specific cell outputs when we can capture the notebook state down the road.

NOTE: `display_pub` hooks are [thread dependent](https://github.com/ipython/ipykernel/blob/8322a7684b004ee95f07b2f86f61e28146a5996d/ipykernel/zmqshell.py#L75). Here we assume we only set the hook from the main thread.

In [47]:
#| exporti

def _update_cell(cell_id, source=None, exec_result=None, display_cnt=None, output_did=None):
    if source is not None:  # only with run cell, reset outputs
        __nb__.update(cell_id, source=source, execution_count=None, outputs=[])  # type: ignore
    if cell := __nb__[cell_id]:
        upd = {'outputs': cell.get('outputs', [])}
        if display_cnt is not None:
            data, metadata, did = display_cnt['data'], display_cnt['metadata'], at(display_cnt, 'transient.display_id', None)
            kw = {'data': data, 'metadata': metadata.copy()}
            if did: kw['metadata']['brd_did'] = did
            if output_did and (output := FC.last(upd['outputs'], lambda o: o.did == output_did)):
                output.data.update(**kw['data']); output.metadata.update(**kw['metadata'])
            else: upd['outputs'].append(NBOutput(dict(output_type='display_data', **kw)))
        if exec_result is not None:
            upd['execution_count'] = exec_result.execution_count
            upd['outputs'].extend(_get_outputs(exec_result))
        __nb__.update(cell_id, **upd)  # type: ignore

In [48]:
#| export

class Bridged:
    "Augment display messages with bridge stuff."
    def __init__(self, start=False):
        self._active, self._ns = False, get_ipython().user_ns  # type: ignore
        if start: self.start()
        else: self._reset()
    @property
    def active(self): return self._active
    def _reset(self):
        self.dhs = deque(maxlen=100)
        if DEBUG(): self.msgs = []
    def start(self):
        if not self._active:
            self._active = True
            get_ipython().display_pub.register_hook(self.bridged)  # type: ignore
        self._reset()
    def stop(self):
        if self._active: 
            self._active = False
            get_ipython().display_pub.unregister_hook(self.bridged)  # type: ignore
    def __del__(self): self.stop()
    
    @property
    def dh(self): return self.dhs[-1] if self.dhs else None
    def bridged(self, msg):
        if not self._active: return msg
        if DEBUG(): self.msgs.append(msg)
        if (msg_type := msg['msg_type']) not in ('display_data', 'update_display_data'): return msg
        content, did = msg['content'], None
        d, md = content['data'], content['metadata']
        # if brd_md := md.get('bridge'):
        #     if brd_md.get('skip'): return msg
        #     # captured = brd_md.get('captured', False)
        skip = at(md, 'bridge.skip', False)
        if _BRDD_MIMES & d.keys() and not skip: 
            if trn := content['transient']: did = trn['display_id']
            elif md: 
                for k,v in md.items():
                    if v.get('skip'): skip=True; break
                    if brd_id := v.get('brd_did'):  # hoist `brd_did` one level
                        md['brd_did'] = did = brd_id
                        del v['brd_did']
                        if not v: del md[k]
                        break
            if not did and bridge_cfg.auto_id and not skip: did = new_id()
            if did: 
                if 'display_id' not in trn: trn['display_id'] = did
                if not self.dhs or (did != self.dhs[-1].display_id): self.dhs.append(DisplayHandle(display_id=did))
                # add brd-mark to HTML output
                if 'text/html' in d: d['text/html'] += _n + to_xml(Brd_Mark(id=did))
        if info := self._ns.get('__cellinfo__'):
            if msg_type == 'display_data':
                _update_cell(info.cell_id, display_cnt=content)
            else:  # update_display_data
                if did and (cell := __nb__.cell_by_did(did)):  # type: ignore
                    _update_cell(cell.id, display_cnt=content, output_did=did)
        return msg


_BRDD_MIMES = {'text/html', 'text/markdown', 'application/json', 'application/javascript', 'text/plain'}


__bridged__ = None
def get_bridged(start=False):
    global __bridged__
    get_csi(True)
    if __bridged__ is None : __bridged__ = Bridged()
    if start: __bridged__.start()
    return __bridged__    

In [49]:
try: 
    get_csi().stop()
    brdd.stop()  # type: ignore
except Exception: pass

__nb__ = NB()  # type: ignore

bridge_cfg.auto_id = False
brdd = get_bridged(True)

### set display_id with display object metadata

```json
{
    ...,
    'msg_type': 'display_data',
    'content': {
        'data': {
            'text/plain': '<IPython.core.display.HTML object>', 
            'text/html': "<div>I'm marked!... MAAARKED!!</div>"
        },
        'metadata': {'text/html': {'brd_did': 'b8b568b9a-c02e1576-c3a3c120-167cedda'}},
        'transient': {}
    },
    'metadata': {}
}

{
    ...,
    'msg_type': 'display_data',
    'content': {
        'data': {
            'text/plain': '<IPython.core.display.HTML object>',
            'text/html': '<div>I\'m marked!... MAAARKED!!</div><brd-mark id="b8b568b9a-c02e1576-c3a3c120-167cedda"></brd-mark>'
        },
        'metadata': {'text/html': {'brd_did': 'b8b568b9a-c02e1576-c3a3c120-167cedda'}},
        'transient': {'display_id': 'b8b568b9a-c02e1576-c3a3c120-167cedda'}
    },
    'metadata': {}
}
```

In [50]:
did = new_id()
print(f"{did=}")
display(HTML("<div>I'm marked!... MAAARKED!!</div>", metadata={'brd_did': did}))
info = __cellinfo__; cell1  = __nb__[info.cell_id]; cell1_copy = cell1.copy()
test_eq(cell1.dids, [did])  # NOTE: this is only valid after the `display(...)` call in above line
21

did='bef65b738-b562617c-fb9f4e11-50d3b9be'


21

In [51]:
display(cell1_copy)
cell1

At cell runtime, current cell can be accesed as `__nb__[__cellinfo__.cell_id]`; after cell execution, it can be accesed as `__nb__[__lastcellinfo__.cell_id]`.

Note `NBCell` instance lifecycle:
- Before code execution: an instance is created with `source` and `id`
- After `display` statement: the instance is updated with `display_data` output
- After cell run: the instance is updated with `execute_result` output

In [52]:
test_is('<div>I\'m marked!... MAAARKED!!</div>' in cell1.outputs[0]['data']['text/html'], True)
dh = brdd.dhs[-1]
test_eq(dh.display_id, did)

For convenience, `Bridged` stores in `dhs` the last display handles used.

```json
{
    ...,
    'msg_type': 'update_display_data',
    'content': {
        'data': {
            'text/plain': '<IPython.core.display.HTML object>', 
            'text/html': "<div>I'm doomed!... DOOOOOMED!!</div>"},
        'metadata': {},
        'transient': {'display_id': 'b5c00d851-9da4a95e-36473c24-3d04534d'}
    },
    'metadata': {}
}

{
    ...,
    'msg_type': 'update_display_data',
    'content': {
        'data': {
            'text/plain': '<IPython.core.display.HTML object>',
            'text/html': '<div>I\'m doomed!... DOOOOOMED!!</div><brd-mark id="b5c00d851-9da4a95e-36473c24-3d04534d"></brd-mark>'
        },
        'metadata': {},
        'transient': {'display_id': 'b5c00d851-9da4a95e-36473c24-3d04534d'}
    },
    'metadata': {}
}
```

In [53]:
dh.update(HTML("<div>I'm doomed!... DOOOOOMED!!</div>"))
cell2 = __nb__[__cellinfo__.cell_id]
test_is(hasattr(cell2, 'did'), False)
htmls = vals_at(cell1, 'outputs.*.data.text/html', True)
test_is(any('MAAARKED' in v for v in htmls), False)
test_is(any('DOOOOOMED' in v for v in htmls), True)

In [54]:
display(cell1)
cell2

## set display_id with display call metadata
> i.e., `display(..., display_id=True)` or `display(..., display_id="...")`

In [55]:
dh = display(HTML("<div>I'm marked!... MAAARKED!!</div>"), display_id=True)
cell = __nb__[__cellinfo__.cell_id]
test_eq(at(cell, 'outputs.0.metadata.brd_did'), brdd.dh.display_id)  # type: ignore
cell

```json
{
    ...,
    'msg_type': 'display_data',
    'content': {
        'data': {
            'text/plain': '<IPython.core.display.HTML object>', 
            'text/html': "<div>I'm marked!... MAAARKED!!</div>"
        },
        'metadata': {},
        'transient': {'display_id': '2307db4acc4fda0ba305ffdda518748a'}
    },
    'metadata': {}
}

{
    ...,
    'msg_type': 'display_data',
.    'content': {
        'data': {
            'text/plain': '<IPython.core.display.HTML object>',
            'text/html': '<div>I\'m marked!... MAAARKED!!</div><brd-mark id="2307db4acc4fda0ba305ffdda518748a"></brd-mark>'
        },
        'metadata': {'brd_did': '2307db4acc4fda0ba305ffdda518748a'},
        'transient': {'display_id': '2307db4acc4fda0ba305ffdda518748a'}
    },
    'metadata': {}
}
```


In [56]:
brdd.dh.update(HTML("<div>I'm doomed!... DOOOOOMED!!</div>"))  # type: ignore
cell

```json
{
    ...,
    'msg_type': 'update_display_data',
    'content': {
        'data': {'text/plain': '<IPython.core.display.HTML object>', 'text/html': "<div>I'm doomed!... DOOOOOMED!!</div>"},
        'metadata': {},
        'transient': {'display_id': 'c3d21633d341d2463f13ef40730e8c4a'}
    },
    'metadata': {}
}

{
    ...,
    'msg_type': 'update_display_data',
    'content': {
        'data': {
            'text/plain': '<IPython.core.display.HTML object>',
            'text/html': '<div>I\'m doomed!... DOOOOOMED!!</div><brd-mark id="c3d21633d341d2463f13ef40730e8c4a"></brd-mark>'
        },
        'metadata': {},
        'transient': {'display_id': 'c3d21633d341d2463f13ef40730e8c4a'}
    },
    'metadata': {}
}
```


In [57]:
display('aaaa', display_id=new_id());

In [58]:
brdd.dh.update(HTML("bbbb"))  # type: ignore

In [59]:
display('cccc', metadata={'bridge': {'brd_did': new_id()}})

In [60]:
brdd.dh.update(HTML("dddd"))  # type: ignore

In [61]:
display('eeee', metadata={'test/plain': {'brd_did': new_id()}})
test_eq(__nb__[__cellinfo__.cell_id].outputs[0].data['text/plain'], "'eeee'")

ffff

In [62]:
brdd.dh.update(Markdown("ffff"))  # type: ignore
test_eq(__nb__[__cellinfo__.cell_id].outputs, [])

## Multi objects display

In [63]:
dh = display(
    HTML("<div>Multi 1</div>"), 
    HTML("<div>Multi 2</div>"), 
    display_id=True)

In [64]:
(cell := __nb__[__lastcellinfo__.cell_id])

In [65]:
if dh: dh.update(HTML("<div>Multi 3</div>"))
cell

When using transient display messages with the `display` function, multi objects display is ill-defined.  

IPython `display` assign the same `display_id` to each object. The front-end however will handle it differently.

`VSCode` displays all objects but only consider transient the last one.  
`Lab`/`Notebook` repeats the last object as many times as the number of objects sent.

In [66]:
display(
    HTML("<div>Multi 1</div>", metadata={'brd_did': (did1 := new_id())}), 
    HTML("<div>Multi 2</div>", metadata={'brd_did': (did2 := new_id())})
)

In [67]:
cell = __nb__[__lastcellinfo__.cell_id]
test_eq(cell.dids, [did1, did2])

In [68]:
dh1, dh2 = brdd.dhs[-2], brdd.dhs[-1]
test_eq(dh1.display_id, did1)
test_eq(dh2.display_id, did2)
dh1.update(HTML("<div>Multi 3</div>"))
time.sleep(0.01)
dh2.update(HTML("<div>Multi 4</div>"))

We can sidestep the issue by using specific Bridge metadata.

## skip

In [69]:
display(Markdown("Skipped"), metadata={'bridge': {'skip': True}})
display("Me too", JSON({"And me": True}), metadata=skip())

Skipped

'Me too'

<IPython.core.display.JSON object>

In [70]:
__nb__[__lastcellinfo__.cell_id]

Skip tagging all display objects.

In [71]:
display(
    HTML("Skipped", metadata={'skip':True, 'brd_did':(did1 := new_id())}),
    HTML("Not me", metadata={'brd_did':(did2 := new_id())}),
)

In [72]:
cell = __nb__[__lastcellinfo__.cell_id]
test_eq(cell.dids, [did2])
cell

Skip specific display object.

## bridge_cfg.auto_id

if `bridge_cfg.auto_id` is True, there's no need to use bridge metadata. Every supported `display`ed object (see `_BRDD_MIMES`) will receive an auto-generated display id.

Caveat: be aware that VSCode limits the number of transient display ids (1000 last time I checked); not Jupyter, I believe.

In [73]:
bridge_cfg.auto_id = True

display(HTML("<div>I'm auto-id'd--</div>"))

In [74]:
brdd.dhs[-1].update(HTML("<div>--as shown above.</div>"))

In [75]:
display(Markdown(f"## did\n???"))

<IPython.core.display.JSON object>

In [76]:
dh = brdd.dhs[-1]
dh.update(HTML(f"<b>did</b>: {dh.display_id}"))

In [77]:
dh.update(JSON({'did': dh.display_id}))

In [78]:
dhs = []
for i in range(5): 
    display(f'{i=}')
    dhs.append(brdd.dhs[-1])

'i+1=1'

'i+1=2'

'i+1=3'

'i+1=4'

'i+1=5'

In [79]:
for i in range(5): 
    dhs[i].update(f'{i+1=}')

In [80]:
brdd.dhs.clear()
brdd.stop()
csi.stop()
bridge_cfg.auto_id = False

----

In [81]:
def show_msgs(brdd: Bridged):
    for msg in brdd.msgs.copy():
        d = msg.copy()
        # d['parent_header'] = {'...': '...'}
        # d['header'] = {'...': '...'}
        del d['parent_header'], d['header'], d['tracker'], d['msg_id']
        if not d['metadata']: del d['metadata']
        try: del d['content']['data']['text/plain']
        except: pass
        if h := d['content']['data'].get('text/html'): d['content']['data']['text/html'] = shorten(h, 'r', 120)
        cprint(d)

if DEBUG(): show_msgs(brdd)

In [82]:
for c in __nb__.cells: display(c)

# OutputCapture

Bridget goal is to control (at least) all HTML output. Bridge can now set metadata of any display message, those that go through display_pub. Bridge captures all `display`, direct or FastHTML bridge.  
But there's other way to produce output that doesn't follow the display_pub path: auto display of cell's final expression. That goes through another code path, display_hook.  
Here Bridget leverage IPython's own capture mechanism to intercept cell results and redirect to display, a path that Bridge already control.

Bridge just captures cell outputs, not stdout/err (yet)

In [83]:
def _transform(lines):
    "Input transformer function"
    cpt = get_capturer()
    if not lines or cpt._capturing or cpt._debugging: return lines
    if DEBUG(): cpt._lines.append(lines)
    if lines[0].startswith('import debugpy'):
        cpt._debugging = True
        return lines
    elif lines[0].startswith('import debugpy;debugpy.listen('): return lines
    elif lines[0].startswith('import debugpy\ndebugpy.debug_this_thread()'): return lines
    elif lines[0].startswith('def __jupyter_exec_background__()'): return lines
    elif lines[0].startswith('import builtins') and lines[1].startswith('import ipykernel'): return lines
    elif lines[0].startswith('import os as _VSCODE_os') and lines[1].startswith('_VSCODE_fileList ='): return lines
    return ['get_capturer()(%r)\n' % ''.join(lines)]
_transform.has_side_effects = False


class OutputCapture:
    shell: InteractiveShell
    def __init__(self):
        super().__init__()
        self._active, self.shell = False, get_ipython()  # type: ignore
        if DEBUG(): self._captures = deque(maxlen=100); self._lines = deque(maxlen=100)
        self._capturing, self._debugging, self.run_outputs = False, False, []
        self.displayhook = CapturingDisplayHook(shell=self.shell, outputs=self.run_outputs)
    @property
    def active(self): return self._active
    def start(self):
        if self._active: return
        self._active = True
        self.shell.user_ns['get_capturer'] = get_capturer
        if DEBUG(): self._captures = deque(maxlen=100)
        # shell.input_transformer_manager.line_transforms.append(_transform)
        self.shell.input_transformers_post.append(_transform)
    def stop(self):
        if not self._active: return
        self._active = False
        # try: shell.input_transformer_manager.line_transforms.remove(_transform)
        try: self.shell.input_transformers_post.remove(_transform)
        except (ValueError, NameError): pass
    def __del__(self): self.stop()

    @contextmanager
    def _capture(self):
        self.run_outputs.clear()
        try: 
            save_display_hook, sys.displayhook = sys.displayhook, self.displayhook
            self._capturing = True
            yield CapturedIO(stdout=None, stderr=None, outputs=self.run_outputs)
        finally: 
            self._capturing = False
            sys.displayhook = save_display_hook
    def __call__(self, cell):
        info: AD = self.shell.user_ns.get('__cellinfo__')  # type: ignore
        with self._capture() as io:
            self.shell.run_cell(cell, cell_id=info.cell_id)
        if DEBUG(): self._captures.append([cell, io._outputs.copy()])
        if io._outputs: 
            assert len(io._outputs) <= 1, "Only one output is supported"
            info.exec_result.result = io._outputs[-1]
            display(io.outputs[-1], metadata={'bridge': {'captured': True}})
    
__capturer__ = None
def get_capturer(start:bool=False):
    global __capturer__
    get_csi(True)
    if __capturer__ is None: __capturer__ = OutputCapture()
    if start: __capturer__.start()
    return __capturer__

In [84]:
get_csi().stop()
get_bridged().stop()
get_capturer().stop()

bridge_cfg.auto_id = True

__nb__ = NB()  # type: ignore

brdd = get_bridged(True)

cptr = get_capturer(True)

In [85]:
1+3

In [86]:
info = __lastcellinfo__
test_eq(__nb__[__lastcellinfo__.cell_id].outputs[0].data, {'text/plain': '4'})
test_eq(len(brdd.dhs), 1)
show(DetailsJSON(__lastcellinfo__, openall=True))

`__cellinfo__.result` has a valid value only **after** the `display(...)` call. The cell with `1+3` captures the output and then displays it with `display(...)`. That occurs after the cell is executed. So, `__cellinfo__.result` is `None` during the cell execution. It's only possible to get the output after the cell has run.

In [87]:
HTML('<div>asdf</div>')

In [88]:
if DEBUG(): 
    output_data = get_capturer()._captures[-1][1][0]['data']
    test_eq(output_data['text/plain'], '<IPython.core.display.HTML object>')
    html = output_data['text/html']
    test_eq(f'<brd-mark id="{brdd.dh.display_id}"' in html, True)  # type: ignore
test_eq(len(brdd.dhs), 3)

In [89]:
print(10)
17

10


In [90]:
1/0

ZeroDivisionError: division by zero

In [91]:
get_bridged() .stop()
get_csi().stop()
get_capturer().stop()

In [92]:
brdd.dhs[0].update(HTML("<div>I was 4, now I'm 44</div>"))

In [93]:
brdd.dhs[2].update(HTML("<div>I was asdf, now I'm qwer</div>"))

In [94]:
brdd.dh.update(HTML("<div>I was 17, now I'm 177</div>"))  # type: ignore

----

In [95]:
def show_captures():
    cc = get_capturer()._captures
    for c in cc:
        cprint(c)
        print('---------')

show_captures()

---------


---------


---------


---------


---------


---------


---------


In [96]:
def show_lines():
    cc = get_capturer()._lines
    for c in cc:
        cprint(shorten(c, 'r', 140))
        print('---------')

show_lines()

---------


---------


---------


---------


---------


---------


---------


---------


---------


---------


---------


---------


---------


---------


In [97]:
if DEBUG(): show_msgs(brdd)  # type: ignore

# CaptureTransformer
> Output capture with AST hooks.

`OutputCapture` works correctly, but conflicts with the debugger abounds as it alters the source code. Fortunately, IPython has another much more powerful hook mechanism, `ast_transformers`, that is cleaner and more flexible.

In [98]:
def _ast_process_result(result):
    if result is not None:
        if not (shell := get_ipython()): return result
        info = shell.user_ns['__cellinfo__']
        if result is not info: info.exec_result.result = result
        display(result, metadata={'bridge': {'captured': True}})

In [99]:
#| export

class CaptureTransformer(ast.NodeTransformer):
    def __init__(self, mode='direct'):
        self.mode, self._active, self._temp_var_prefix = mode, False, f"_ast_cap_{uuid.uuid4().hex[:8]}_"
        self._global_function_name = '_ast_process_result'
        self._transform = self._transform_expr_direct if mode == 'direct' else self._transform_expr_conditional

    @property
    def active(self): return self._active
    __is_transformer__ = True
    def start(self):
        if not (shell := get_ipython()) or self._active: return
        self._active = True
        shell.ast_transformers = [t for t in shell.ast_transformers if not getattr(t, '__is_transformer__', False)]
        shell.ast_transformers.append(self)
        shell.user_ns[self._global_function_name] = _ast_process_result
    def stop(self):
        if not (shell := get_ipython()) or not self._active: return
        self._active = False
        shell.ast_transformers = [t for t in shell.ast_transformers if not getattr(t, '__is_transformer__', False)]
    def __del__(self): self.stop()

    def visit_Module(self, node: ast.Module):
        node = self.generic_visit(node)  # type: ignore
        if not node.body or not isinstance(node.body[-1], ast.Expr): return node
        last_stmt = node.body[-1]
        expr = last_stmt.value
        if self._should_reject_input(expr): raise InputRejected(f"Cannot transform {type(expr).__name__} expressions")
        # if self.mode == 'direct': self._transform_expr_direct(node, expr)
        # else: self._transform_expr_conditional(node, expr)
        self._transform(node, expr)
        ast.fix_missing_locations(node)
        return node
    def _transform_expr_direct(self, node: ast.Module, expr: ast.expr):
        call_node = ast.Call(func=ast.Name(id=self._global_function_name, ctx=ast.Load()), args=[expr],keywords=[])
        node.body[-1] = ast.Expr(value=call_node)
    def _transform_expr_conditional(self, node: ast.Module, expr: ast.expr):
        temp_var = f"{self._temp_var_prefix}result"
        assign_node = ast.Assign(targets=[ast.Name(id=temp_var, ctx=ast.Store())], value=expr)
        call_result = ast.Call(func=ast.Name(id=self._global_function_name, ctx=ast.Load()),
                                args=[ast.Name(id=temp_var, ctx=ast.Load())],keywords=[])
        node.body[-1] = assign_node
        node.body.append(ast.Expr(value=call_result))
    def _should_reject_input(self, expr):
        unsafe_types = (ast.Yield, ast.YieldFrom, ast.Await)
        return isinstance(expr, unsafe_types)


__capturer__ = None
def get_capturer(start=False):
    global __capturer__
    get_bridged(True)
    if __capturer__ is None: __capturer__ = CaptureTransformer(mode='direct')
    if start: __capturer__.start()
    return __capturer__

In [100]:
get_csi().stop()
get_bridged().stop()
get_capturer().stop()

bridge_cfg.auto_id = True

__nb__ = NB()  # type: ignore

brdd = get_bridged()

cptr = get_capturer(True)
test_eq(get_ipython().ast_transformers, [cptr])  # type: ignore

In [101]:
node = cptr.visit_Module(ast.parse('''
x = 10
y = 20
x + y
'''))

test_eq(ast.unparse(node), 'x = 10\ny = 20\n_ast_process_result(x + y)')

In [102]:
x = 10
y = 20
x + y

30

In [103]:
test_is(_ != 30, True)
# test_eq(__lastcellinfo__.exec_result.result, 30)
show(DetailsJSON(__lastcellinfo__, summary='__lastcellinfo__', openall=True))
__nb__[__lastcellinfo__.cell_id]

Note that `CaptureTranformer` changes the semantics of IPython code execution because is effectively disabling the [Output caching system](https://ipython.readthedocs.io/en/latest/interactive/reference.html#output-caching-system) as it's intercepting all cell outputs. ~~If you have any use for `_`|`_<n>`|`_oh`|`Out` variables, `CaptureTranformer` has the same effect as setting `InteractiveShell.cache_size` to 0.~~

During cell execution, IPython replaces `sys.displayhook` with a custom `DisplayHook` instance responsible for displaying the result of the cell execution (among many other things). `Bridget` now handles cell outputs (and does what `displayhook` did before to show the cell result). The shell `displayhook` then always receives a result of None. `Bridget` must replicate some (not sure what to do about the `shell.history_manager`) of the functionality of `displayhook` to ensure that output cache variables are updated correctly (see [\_\_call\_\_](https://github.com/ipython/ipython/blob/4d0c438d617e49b77d68cd98208f7b2d371a1381/IPython/core/displayhook.py#L269)).

In [104]:
#| exporti

def _ast_process_result(result):
    if result is not None:
        if not (shell := get_ipython()): return result
        info = shell.user_ns['__cellinfo__']
        if result is not info: info.exec_result.result = result
        # this is only called during cell execution; displayhook should be the custom one, not sys.displayhook
        displayhook = shell.displayhook  # type: ignore
        if not displayhook.quiet():
            if not getattr(result, '_ipython_display_', None):
                display(result, metadata={'bridge': {'captured': True}})
            displayhook.update_user_ns(result)
            displayhook.fill_exec_result(result)

In [105]:
class TestD:
    def _ipython_display_(self):
        from IPython.display import display
        # dhdl = DisplayId()
        # dhdl.display(self.text)
        display(HTML('from _ipython_display_'))

TestD()

In [106]:
__nb__[__lastcellinfo__.cell_id]

In [107]:
bridge_cfg.auto_show = False

In [108]:
Div('Hey, Foo!')

```html
<div>Hey, Foo!</div>

```

In [109]:
__nb__[__lastcellinfo__.cell_id]

# get_nb_from_hooks

In [110]:
#| exporti

def get_nb_from_hooks() -> NB:
    get_csi(True)
    get_bridged(True)
    get_capturer(True)
    return __nb__

In [111]:
#| exporti

__nb__ = NB()
if shell := get_ipython():
    if '__nb__' not in shell.user_ns: shell.user_ns['__nb__'] = __nb__
    shell.user_ns['__cellinfo__'] = None
    shell.user_ns['__lastcellinfo__'] = None
    if dformatter := shell.display_formatter:
        _BRDD_MIMES = set(dformatter.format_types)# - {'text/plain'}  # type: ignore

if bridge_cfg.bootstrap: get_nb_from_hooks()

In [112]:
_BRDD_MIMES

{'application/javascript',
 'application/json',
 'application/pdf',
 'image/jpeg',
 'image/png',
 'image/svg+xml',
 'text/html',
 'text/latex',
 'text/markdown',
 'text/plain'}

What does this module do?

- Receives IPython's events to record cell info (what the kernel can possibly know before and after the cell run).
- Hooks into the display system to convert all calls into `transient` calls with a display_id connected to the cell_id.
- Transforms cell code with a custom AST transformer to redirect cell result to the display system.

This way, Bridget effectively knows what each cell output is (except stdout/err for now) and how to reference and modify it using standard IPython features.

Is this enough to make Bridget a functional notebook editor? Not really, we need real-time updates of the notebook structure (the notebook state, what would be saved to disk as .ipynb) to be able to navigate the notebook.

That unfortunately requires to navigate the procellous waters of widgets and extensions. Good ol' IPython and Jupyter are not designed to give the kernel knowledge about the notebook state in real time. We pythonistas deluded ourselves into thinking Jupiter is all about us, but a Jupyter notebook is really a JavaScript application that controls everything, including the model and the view. The kernel is a second class citizen that knows next to nothing about or even what is a notebook.

We're going to fix that next.

# Colophon
----


In [113]:
import fastcore.all as FC
import nbdev
from nbdev.clean import nbdev_clean

In [114]:
if FC.IN_NOTEBOOK:
    nb_path = '15_nb_hooks.ipynb'
    # nbdev_clean(nb_path)
    nbdev.nbdev_export(nb_path)