Skip to content

Commit

Permalink
Add glue and paste functionality (#66)
Browse files Browse the repository at this point in the history
committed on behalf of @choldgraf 

This adds a prototype functionality for "glue and paste" with MyST-NB. It closes #4 

You glue things into a notebook's metadata like this:

```python
from myst_nb import glue
glue("your-key", an_object)
```

And it will run IPython's display on the object, then store the mimebundle
at the key you specify.

When the notebooks are parsed with `MyST-NB`, it builds up a registry of all
the keys across all notebooks, so that you can then refer to them in the following ways:

You can paste it in markdown with a directive like this:

````
```{paste} your-key
```
````

Or you can add it in-line like this:

```
{paste}`your-key`
```
optionally:

```
{paste}`your-key:format-string`
```

See documentation for more details
  • Loading branch information
choldgraf committed Mar 14, 2020
1 parent e79a627 commit b46c2f1
Show file tree
Hide file tree
Showing 14 changed files with 1,066 additions and 28 deletions.
2 changes: 1 addition & 1 deletion docs/use/basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6-final"
"version": "3.8.0"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
Expand Down
280 changes: 280 additions & 0 deletions docs/use/glue.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/use/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ basic
interactive
hiding
markdown
glue
```
19 changes: 18 additions & 1 deletion myst_nb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
JupyterCell,
)

# from ipywidgets import embed
from pathlib import Path
import json

from .parser import (
NotebookParser,
Expand All @@ -18,6 +18,7 @@
CellOutputBundleNode,
)
from .transform import CellOutputsToNodes
from .glue import Paste, paste_role, PasteNodesToDocutils


def static_path(app):
Expand All @@ -35,6 +36,17 @@ def update_togglebutton_classes(app, config):
config.togglebutton_selector += f", {selector}"


def init_glue_cache(app):
if not hasattr(app.env, "glue_data"):
app.env.glue_data = {}


def save_glue_cache(app, env):
path_cache = Path(env.doctreedir).joinpath("glue_cache.json")
with path_cache.open("w") as handle:
json.dump(env.glue_data, handle)


def setup(app):
"""Initialize Sphinx extension."""
# Sllow parsing ipynb files
Expand Down Expand Up @@ -82,13 +94,18 @@ def visit_element_html(self, node):
)

# Register our post-transform which will convert output bundles to nodes
app.add_post_transform(PasteNodesToDocutils)
app.add_post_transform(CellOutputsToNodes)

app.connect("builder-inited", init_glue_cache)
app.connect("builder-inited", static_path)
app.connect("config-inited", update_togglebutton_classes)
app.connect("env-updated", save_glue_cache)
app.add_css_file("mystnb.css")
# We use `execute` here instead of `jupyter-execute`
app.add_directive("execute", JupyterCell)
app.add_directive("paste", Paste)
app.add_role("paste", paste_role)
app.setup_extension("jupyter_sphinx")

return {"version": __version__, "parallel_read_safe": True}
7 changes: 7 additions & 0 deletions myst_nb/_static/mystnb.css
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,10 @@ div.cell_output table {
div.cell_output tbody tr:hover {
background: rgba(66, 165, 245, 0.2);
}


/* Inline text from `paste` operation */

span.pasted-text {
font-weight: bold;
}
238 changes: 238 additions & 0 deletions myst_nb/glue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
import nbformat as nbf
from pathlib import Path
import json
import IPython
from IPython.display import display as ipy_display
from sphinx.util.docutils import SphinxDirective
from sphinx.transforms import SphinxTransform
from docutils import nodes
from docutils.parsers.rst import directives
from sphinx.util import logging

from .parser import CellNode, CellInputNode, CellOutputBundleNode

SPHINX_LOGGER = logging.getLogger(__name__)

GLUE_PREFIX = "application/papermill.record/"


def glue(name, variable, display=True):
"""Glue an variable into the notebook's cell metadata.
Parameters
----------
name: string
A unique name for the variable. You can use this name to refer to the variable
later on.
variable: python object
A variable in Python for which you'd like to store its display value. This is
not quite the same as storing the object itself - the stored information is
what is *displayed* when you print or show the object in a Jupyter Notebook.
display: bool
Display the object you are gluing. This is helpful in sanity-checking the
state of the object at glue-time.
"""
mimebundle, metadata = IPython.core.formatters.format_display_data(variable)
mime_prefix = "" if display else GLUE_PREFIX
metadata["scrapbook"] = dict(name=name, mime_prefix=mime_prefix)
ipy_display(
{mime_prefix + k: v for k, v in mimebundle.items()}, raw=True, metadata=metadata
)


def read_glue_cache(path):
"""Read a glue cache generated by Sphinx build.
Parameters
----------
path : str
Path to a doctree dir, or directly to a glue cache .json file.
Returns
-------
data : dictionary
A dictionary containing the JSON data in your glue cache.
"""
path = Path(path)
if path.is_dir():
# Assume our folder is doctrees and append the glue data name to it.
path = path.joinpath("glue_cache.json")
if not path.exists():
raise FileNotFoundError(f"A glue cache was not found at: {path}")

data = json.load(path.open())
return data


def find_glued_key(path_ntbk, key):
"""Find an output mimebundle in a notebook based on a key.
Parameters
----------
path_ntbk : path
The path to a Jupyter Notebook that has variables "glued" in it.
key : string
The unique string to use as a look-up in `path_ntbk`.
Returns
-------
mimebundle
The output mimebundle associated with the given key.
"""
# Read in the notebook
if isinstance(path_ntbk, Path):
path_ntbk = str(path_ntbk)
ntbk = nbf.read(path_ntbk, nbf.NO_CONVERT)
outputs = []
for cell in ntbk.cells:
if cell.cell_type != "code":
continue

# If we have outputs, look for scrapbook metadata and reference the key
for output in cell["outputs"]:
meta = output.get("metadata", {})
if "scrapbook" in meta:
this_key = meta["scrapbook"]["name"].replace(GLUE_PREFIX, "")
if key == this_key:
bundle = output["data"]
bundle = {this_key: val for key, val in bundle.items()}
outputs.append(bundle)
if len(outputs) == 0:
raise KeyError(f"Did not find key {this_key} in notebook {path_ntbk}")
if len(outputs) > 1:
raise KeyError(
f"Multiple variables found for key: {this_key}. Returning first value."
)
return outputs[0]


def find_all_keys(ntbk, keys=None, path=None, logger=None):
"""Find all `glue` keys in a notebook and return a dictionary with key: outputs."""
if isinstance(ntbk, (str, Path)):
ntbk = nbf.read(str(ntbk), nbf.NO_CONVERT)

if keys is None:
keys = {}

for cell in ntbk.cells:
if cell.cell_type != "code":
continue

for output in cell["outputs"]:
meta = output.get("metadata", {})
if "scrapbook" in meta:
this_key = meta["scrapbook"]["name"]
if this_key in keys:
msg = f"Over-writing pre-existing glue key: `{this_key}`"
if logger is None:
print(msg)
else:
logger.warning(msg, location=(path, None))
keys[this_key] = output
return keys


# Role and directive for pasting
class Paste(SphinxDirective):
required_arguments = 1
final_argument_whitespace = True
has_content = False

option_spec = {"id": directives.unchanged}

def run(self):
# TODO: Figure out how to report cell number in the location
# currently, line numbers in ipynb files are not reliable
path, lineno = self.state_machine.get_source_and_line(self.lineno)
# Remove line number if we have a notebook because it is unreliable
if path.endswith(".ipynb"):
lineno = None
# Remove the suffix from path so its suffix is printed properly in logs
path = str(Path(path).with_suffix(""))
return [PasteNode(self.arguments[0], "directive", location=(path, lineno))]


def paste_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
path = inliner.document.current_source
# Remove line number if we have a notebook because it is unreliable
if path.endswith(".ipynb"):
lineno = None
path = str(Path(path).with_suffix(""))
return [PasteNode(text, "role", location=(path, lineno))], []


# Transform to replace nodes with outputs
class PasteNode(nodes.container):
"""Represent a MimeBundle in the Sphinx AST, to be transformed later."""

def __init__(self, key, kind, location=None, rawsource="", *children, **attributes):
self.key = key
self.kind = kind
self.location = location
super().__init__("", **attributes)


class PasteNodesToDocutils(SphinxTransform):
"""Use the builder context to transform a CellOutputNode into Sphinx nodes."""

default_priority = 699 # must be applied before CellOutputsToNodes

def apply(self):
glue_data = self.app.env.glue_data
for paste_node in self.document.traverse(PasteNode):

# First check if we have both key:format in the key
parts = paste_node.key.rsplit(":", 1)
if len(parts) == 2:
key, formatting = parts
else:
key = parts[0]
formatting = None

if key not in glue_data:
SPHINX_LOGGER.warning(
f"Couldn't find key `{key}` in keys defined across all pages.",
location=paste_node.location,
)
continue

# Grab the output for this key and replace `glue` specific prefix info
output = glue_data.get(key).copy()
output["data"] = {
key.replace(GLUE_PREFIX, ""): val for key, val in output["data"].items()
}

# Roles will be parsed as text, with some formatting fanciness
if paste_node.kind == "role":
# Currently only plain text is supported
if "text/plain" in output["data"]:
text = output["data"]["text/plain"].strip("'")
# If formatting is specified, see if we have a number of some kind
if formatting:
try:
newtext = float(text)
text = f"{newtext:>{formatting}}"
except ValueError:
pass
out_node = nodes.inline(text, text, classes=["pasted-text"])
else:
SPHINX_LOGGER.warning(
f"Couldn't find compatible output format for key `{key}`",
location=paste_node.location,
)
# Directives will have the whole output chunk deposited and rendered later
elif paste_node.kind == "directive":
output_node = CellOutputBundleNode(outputs=[output])
out_node = CellNode()
out_node += CellInputNode()
out_node += output_node
else:
SPHINX_LOGGER.error(
(
"`kind` must by one of `role` or `directive`,"
f"not `{paste_node.kind}`"
),
location=paste_node.location,
)

paste_node.replace_self(out_node)
34 changes: 33 additions & 1 deletion myst_nb/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class NotebookParser(MystParser):
config_section_dependencies = ("parsers",)

def parse(self, inputstring, document):
from .glue import find_all_keys, GLUE_PREFIX

# de-serialize the notebook
ntbk = nbf.reads(inputstring, nbf.NO_CONVERT)
Expand Down Expand Up @@ -124,14 +125,45 @@ def parse(self, inputstring, document):
except AttributeError:
pass

# Write the notebook's output to disk
# Remove all the mime prefixes from "glue" step.
# This way, writing properly captures the glued images
replace_mime = []
for cell in ntbk.cells:
if hasattr(cell, "outputs"):
for out in cell.outputs:
if "data" in out:
# Only do the mimebundle replacing for the scrapbook outputs
if out.get("metadata", {}).get("scrapbook", {}).get("name"):
out["data"] = {
key.replace(GLUE_PREFIX, ""): val
for key, val in out["data"].items()
}
replace_mime.append(out)

# Write the notebook's output to disk. This changes metadata in notebook cells
path_doc = Path(document.settings.env.docname)
doc_relpath = path_doc.parent
doc_filename = path_doc.name
build_dir = Path(document.settings.env.app.outdir).parent
output_dir = build_dir.joinpath("jupyter_execute", doc_relpath)
write_notebook_output(ntbk, str(output_dir), doc_filename)

# Now add back the mime prefixes to the right outputs so they aren't rendered
# until called from the role/directive
for out in replace_mime:
out["data"] = {
f"{GLUE_PREFIX}{key}": val for key, val in out["data"].items()
}

# Update our glue key list with new ones defined in this page
new_keys = find_all_keys(
ntbk,
keys=document.settings.env.glue_data,
path=str(path_doc),
logger=SPHINX_LOGGER,
)
document.settings.env.glue_data.update(new_keys)

# render the Markdown AST to docutils AST
renderer = SphinxNBRenderer(
parse_context=parse_context, document=document, current_node=None
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
"pytest-cov",
"pytest-regressions",
"beautifulsoup4",
"ipython",
],
"rtd": [
"sphinxcontrib-bibtex",
Expand Down

0 comments on commit b46c2f1

Please sign in to comment.