Skip to content

Commit

Permalink
Move to markdown-it-py markdown parser implementation (#107)
Browse files Browse the repository at this point in the history
This commit move to markdown-it-py markdown parser implementation, concurrently with myst-parser. Additionally:

- Add notebook render tests
- Add simple solution for reporting correct cell index/line number:
  Report line number as <cell index>*10000 + <line number>. This is a simple solution to addresses #71, that doesn't require any complex overrides of the sphinx reporting machinery.
- Make tests use the actual sphinx Application
- Re-write validation of which docs to execute/cache:
  Rather than having a global variable, we save the exclude paths in the sphinx env and use a seperate function `is_valid_exec_file`. Also added tests
  • Loading branch information
chrisjsewell committed Apr 1, 2020
1 parent f07f225 commit 1d20384
Show file tree
Hide file tree
Showing 33 changed files with 918 additions and 625 deletions.
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ exclude: >
(?x)^(
\.vscode/settings\.json|
tests/commonmark/commonmark\.json|
.*\.xml
.*\.xml|
tests/.*\.txt
)$
repos:
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@
"python.linting.flake8Enabled": true,
"python.linting.enabled": true,
"python.pythonPath": "/anaconda/envs/ebp/bin/python",
"python.dataScience.useNotebookEditor": false
}
4 changes: 4 additions & 0 deletions docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ help:
# raise warnings to errors
html-strict:
@$(SPHINXBUILD) -b html -nW --keep-going "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)

# increase logging level to verbose
html-verbose:
@$(SPHINXBUILD) -b html -v "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
31 changes: 27 additions & 4 deletions myst_nb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
__version__ = "0.5.0a1"
__version__ = "0.6.0"

from pathlib import Path

from docutils import nodes
from myst_nb.cache import execution_cache
from sphinx.util import logging

from jupyter_sphinx.ast import ( # noqa: F401
JupyterWidgetStateNode,
JupyterWidgetViewNode,
JupyterCell,
)

from pathlib import Path

from .cache import execution_cache
from .parser import (
NotebookParser,
CellNode,
Expand All @@ -22,12 +24,32 @@
from .nb_glue.domain import NbGlueDomain
from .nb_glue.transform import PasteNodesToDocutils

LOGGER = logging.getLogger(__name__)


def static_path(app):
static_path = Path(__file__).absolute().with_name("_static")
app.config.html_static_path.append(str(static_path))


def set_valid_execution_paths(app):
"""Set files excluded from execution, and valid file suffixes
Patterns given in execution_excludepatterns conf variable from executing.
"""
app.env.excluded_nb_exec_paths = {
str(path)
for pat in app.config["execution_excludepatterns"]
for path in Path().cwd().rglob(pat)
}
LOGGER.verbose("MyST-NB: Excluded Paths: %s", app.env.excluded_nb_exec_paths)
app.env.allowed_nb_exec_suffixes = {
suffix
for suffix, parser_type in app.config["source_suffix"].items()
if parser_type in ("ipynb",)
}


def update_togglebutton_classes(app, config):
to_add = [
".tag_hide_input div.cell_input",
Expand Down Expand Up @@ -98,6 +120,7 @@ def visit_element_html(self, node):
app.add_post_transform(CellOutputsToNodes)

app.connect("builder-inited", static_path)
app.connect("builder-inited", set_valid_execution_paths)
app.connect("env-get-outdated", execution_cache)
app.connect("config-inited", update_togglebutton_classes)
app.connect("env-updated", save_glue_cache)
Expand Down
96 changes: 42 additions & 54 deletions myst_nb/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,67 +12,55 @@
from jupyter_cache import get_cache
from jupyter_cache.executors import load_executor

logger = logging.getLogger(__name__)
LOGGER = logging.getLogger(__name__)

filtered_nb_list = set() # TODO preferably this wouldn't be a global variable

def is_valid_exec_file(env, docname):
"""Check if the docname refers to a file that should be executed."""
doc_path = env.doc2path(docname)
if doc_path in env.excluded_nb_exec_paths:
return False
extension = os.path.splitext(doc_path)[1]
if extension not in env.allowed_nb_exec_suffixes:
return False
return True

def execution_cache(app, builder, added, changed, removed, path_cache=None):

def execution_cache(app, builder, added, changed, removed):
"""
If cacheing is required, stages and executes the added or modified notebooks,
If caching is required, stages and executes the added or modified notebooks,
and caches them for further use.
"""
jupyter_cache = False
exclude_files = []
file_list = added.union(
changed
) # all the added and changed notebooks should be operated on.

# all the added and changed notebooks should be operated on.
# note docnames are paths relative to the sphinx root folder, with no extensions
altered_docnames = added.union(changed)

if app.config["jupyter_execute_notebooks"] not in ["force", "auto", "cache", "off"]:
logger.error(
LOGGER.error(
"Conf jupyter_execute_notebooks can either be `force`, `auto`, `cache` or `off`" # noqa: E501
)
exit(1)

jupyter_cache = app.config["jupyter_cache"]

# excludes the file with patterns given in execution_excludepatterns
# conf variable from executing, like index.rst
for path in app.config["execution_excludepatterns"]:
exclude_files.extend(Path().cwd().rglob(path))

allowed_suffixes = {
suffix
for suffix, parser_type in app.config["source_suffix"].items()
if parser_type in ("ipynb",)
}

nb_list = [
p
for p in file_list
if os.path.splitext(app.env.doc2path(p))[1] in allowed_suffixes
exec_list = [
docname for docname in altered_docnames if is_valid_exec_file(app.env, docname)
]

for nb in nb_list:
exclude = False
for files in exclude_files:
if nb in str(files):
exclude = True
break
if not exclude:
filtered_nb_list.add(nb)
LOGGER.verbose("MyST-NB: Potential docnames to execute: %s", exec_list)

if "cache" in app.config["jupyter_execute_notebooks"]:
if jupyter_cache:
if os.path.isdir(jupyter_cache):
path_cache = jupyter_cache
else:
logger.error("Path to jupyter_cache is not a directory")
LOGGER.error(
f"Path to jupyter_cache is not a directory: {jupyter_cache}"
)
exit(1)
else:
path_cache = path_cache or Path(app.outdir).parent.joinpath(
".jupyter_cache"
)
path_cache = Path(app.outdir).parent.joinpath(".jupyter_cache")

app.env.path_cache = str(
path_cache
Expand All @@ -83,28 +71,29 @@ def execution_cache(app, builder, added, changed, removed, path_cache=None):
docpath = app.env.doc2path(path)
# there is an issue in sphinx doc2path, whereby if the path does not
# exist then it will be assigned the default source_suffix (usually .rst)
for suffix in allowed_suffixes:
# therefore, to be safe here, we run through all possible suffixes
for suffix in app.env.allowed_nb_exec_suffixes:
docpath = os.path.splitext(docpath)[0] + suffix
cache_base.discard_staged_notebook(docpath)

_stage_and_execute(app, filtered_nb_list, path_cache)
_stage_and_execute(app, exec_list, path_cache)

elif jupyter_cache:
logger.error(
LOGGER.error(
"If using conf jupyter_cache, please set jupyter_execute_notebooks" # noqa: E501
" to `cache`"
)
exit(1)

return file_list # TODO: can also compare timestamps for inputs outputs
return altered_docnames


def _stage_and_execute(app, nb_list, path_cache):
def _stage_and_execute(app, exec_list, path_cache):
pk_list = None

cache_base = get_cache(path_cache)

for nb in nb_list:
for nb in exec_list:
if "." in nb: # nb includes the path to notebook
source_path = nb
else:
Expand All @@ -115,9 +104,8 @@ def _stage_and_execute(app, nb_list, path_cache):
stage_record = cache_base.stage_notebook_file(source_path)
pk_list.append(stage_record.pk)

execute_staged_nb(
cache_base, pk_list
) # can leverage parallel execution implemented in jupyter-cache here
# can leverage parallel execution implemented in jupyter-cache here
execute_staged_nb(cache_base, pk_list)


def add_notebook_outputs(env, ntbk, file_path=None):
Expand All @@ -134,9 +122,7 @@ def add_notebook_outputs(env, ntbk, file_path=None):
reports_dir = str(dest_path) + "/reports"
path_cache = False

# checking if filename in execute_excludepattern
file_present = [env.docname in nb for nb in filtered_nb_list]
if True not in file_present:
if not is_valid_exec_file(env, env.docname):
return ntbk

if "cache" in env.config["jupyter_execute_notebooks"]:
Expand All @@ -148,10 +134,10 @@ def add_notebook_outputs(env, ntbk, file_path=None):
file_path, env.config["jupyter_execute_notebooks"]
)
if not has_outputs:
logger.info("Executing: {}".format(env.docname))
LOGGER.info("Executing: {}".format(env.docname))
ntbk = execute(ntbk)
else:
logger.info(
LOGGER.info(
"Did not execute {}. "
"Set jupyter_execute_notebooks to `force` to execute".format(
env.docname
Expand Down Expand Up @@ -184,7 +170,9 @@ def add_notebook_outputs(env, ntbk, file_path=None):
full_path
)

logger.error(message)
LOGGER.error(message)
else:
LOGGER.verbose("Merged cached outputs into %s", str(r_file_path))

return ntbk

Expand All @@ -194,9 +182,9 @@ def execute_staged_nb(cache_base, pk_list):
executing the staged notebook
"""
try:
executor = load_executor("basic", cache_base, logger=logger)
executor = load_executor("basic", cache_base, logger=LOGGER)
except ImportError as error:
logger.error(str(error))
LOGGER.error(str(error))
return 1
result = executor.run_and_cache(filter_pks=pk_list or None)
return result
Expand Down
Loading

0 comments on commit 1d20384

Please sign in to comment.