Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new clean, convert, export commands #30

Merged
merged 25 commits into from
Jul 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
eacd22b
Add new clean, convert, export commands
jayqi Jun 25, 2020
deb0841
Rename autoclean to just clean
jayqi Jun 25, 2020
f3094b1
Fix unit tests
jayqi Jun 26, 2020
e25d1ac
Rename test scripts
jayqi Jun 26, 2020
ffd9ddb
New approach to cleaning
jayqi Jun 26, 2020
e2edc58
Rename convert module to export
jayqi Jun 27, 2020
ae1344b
Simplify cleaning; fix pdf convert bug
jayqi Jun 27, 2020
0f2d2d1
Lots of tests
jayqi Jun 27, 2020
fcdfdfd
Test post_save cleaning
jayqi Jun 27, 2020
1674e44
cleared_argv tests
jayqi Jun 27, 2020
b1b05e1
Add handling of images for asciidoc and rst
jayqi Jun 27, 2020
00193b5
Updated notebook identification and script extension handling
jayqi Jun 27, 2020
a1c1eb4
Change get_extension to have consistent signature
jayqi Jun 27, 2020
6ef14ca
Documentation
jayqi Jun 28, 2020
43ecd34
Change default organize_by back to notebook. Will change separately
jayqi Jun 28, 2020
a7c79cf
Fix relative working directory bug
jayqi Jul 1, 2020
019902f
Test for working_directory context manager
jayqi Jul 1, 2020
78bd46b
Merge convert and export commands
jayqi Jul 2, 2020
4a15445
clean and export handling no notebooks
jayqi Jul 2, 2020
2d88554
Remove redundant test dir specification to allow for individual tests
jayqi Jul 2, 2020
5eef6a1
Use miniconda and install pandoc and texlive
jayqi Jul 2, 2020
5c5b4f5
Remove test for pdf
jayqi Jul 2, 2020
5095d2e
subprocess capture_output not available in python 3.6
jayqi Jul 2, 2020
1412c15
Fix change to test
jayqi Jul 2, 2020
a44bae0
rename to replace to work with Windows
jayqi Jul 2, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@ insert_final_newline = false

[Makefile]
indent_style = tab

[*.yml]
indent_size = 2
15 changes: 11 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,28 @@ jobs:

- uses: actions/checkout@v2

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
- name: Set up Python ${{ matrix.python-version }} with Miniconda
uses: goanpeca/setup-miniconda@v1
with:
python-version: ${{ matrix.python-version }}
channels: conda-forge

- name: Install dependencies
shell: bash -l {0}
run: |
which python
python --version
python -m pip install --upgrade pip
pip install -r requirements-dev.txt
python -m pip install -r requirements-dev.txt
conda install pandoc

- name: Lint package
shell: bash -l {0}
run: |
make lint

- name: Run tests
shell: bash -l {0}
run: |
make test

Expand All @@ -44,4 +51,4 @@ jobs:
with:
file: ./coverage.xml
fail_ci_if_error: true
if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == 3.8 }}
if: matrix.os == 'ubuntu-latest' && matrix.python-version == 3.8
9 changes: 3 additions & 6 deletions nbautoexport/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
from nbautoexport.nbautoexport import post_save
from nbautoexport._version import get_versions
from nbautoexport.export import post_save
from nbautoexport.utils import __version__

__all__ = [post_save]

__version__ = get_versions()["version"]
del get_versions
__all__ = [post_save, __version__]
3 changes: 3 additions & 0 deletions nbautoexport/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from nbautoexport.nbautoexport import app

app(prog_name="python -m nbautoexport")
116 changes: 116 additions & 0 deletions nbautoexport/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from typing import Iterable, List
from pathlib import Path

from nbconvert.exporters import get_exporter

from nbautoexport.utils import find_notebooks, JupyterNotebook
from nbautoexport.sentinel import (
ExportFormat,
NbAutoexportConfig,
OrganizeBy,
SAVE_PROGRESS_INDICATOR_FILE,
)

FORMATS_WITH_IMAGE_DIR = [
ExportFormat.asciidoc,
ExportFormat.latex,
ExportFormat.markdown,
ExportFormat.rst,
]


def get_extension(notebook: JupyterNotebook, export_format: ExportFormat) -> str:
"""Given a notebook and export format, return expected export file extension.

Args:
notebook (JupyterNotebook): notebook to determine extension for
export_format (str): export format name

Returns:
str: file extension, e.g., '.py'
"""
# Script format needs notebook to determine appropriate language's extension
if ExportFormat(export_format) == ExportFormat.script:
return notebook.get_script_extension()

exporter = get_exporter(ExportFormat(export_format).value)

if ExportFormat(export_format) == ExportFormat.notebook:
return f".nbconvert{exporter().file_extension}"
return exporter().file_extension


def notebook_exports_generator(
notebook: JupyterNotebook, export_format: ExportFormat, organize_by: OrganizeBy
) -> Iterable[Path]:
"""[summary]

Args:
notebook (JupyterNotebook): notebook to get export paths for
export_format (ExportFormat): export format
organize_by (OrganizeBy): type of subfolder approach

Returns:
Iterable[Path]: expected export paths given notebook and configuration options
"""
if organize_by == OrganizeBy.notebook:
subfolder = notebook.path.parent / notebook.name
elif organize_by == OrganizeBy.extension:
subfolder = notebook.path.parent / export_format.value
yield subfolder
yield subfolder / f"{notebook.name}{get_extension(notebook, export_format)}"
if export_format in FORMATS_WITH_IMAGE_DIR:
image_dir = subfolder / f"{notebook.name}_files"
if image_dir.exists():
yield image_dir
yield from image_dir.iterdir()


def get_expected_exports(
notebooks: Iterable[JupyterNotebook], config: NbAutoexportConfig
) -> List[Path]:
"""Given an iterable of Jupyter notebooks, return list of paths of files that nbautoexport
would be expected to export to given this configuration.

Args:
notebooks (Iterable[JupyterNotebooks]): iterable of notebooks

Returns:
List[Path]: list of expected nbautoexport output files, relative to notebook files
"""

export_paths = set()
for notebook in notebooks:
for export_format in config.export_formats:
export_paths.update(
notebook_exports_generator(notebook, export_format, config.organize_by)
)
return sorted(export_paths)


def find_files_to_clean(directory: Path, config: NbAutoexportConfig) -> List[Path]:
"""Given path to a notebooks directory watched by nbautoexport, find all files that are not
expected exports by current nbautoexport configuration and existing notebooks, or other
expected Jupyter or nbautoexport files.

Args:
directory (Path): notebooks directory to find files to clean up

Returns:
List[Path]: list of files to clean up
"""
notebooks: List[JupyterNotebook] = find_notebooks(directory)
expected_exports: List[Path] = get_expected_exports(notebooks, config)
checkpoints = (f for f in directory.glob(".ipynb_checkpoints/*") if f.is_file())
sentinel_path = directory / SAVE_PROGRESS_INDICATOR_FILE

subfiles = (f for f in directory.glob("**/*") if f.is_file())

to_clean = (
set(subfiles)
.difference(nb.path for nb in notebooks)
.difference(expected_exports)
.difference(checkpoints)
.difference([sentinel_path])
)
return sorted(to_clean)
122 changes: 122 additions & 0 deletions nbautoexport/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from pathlib import Path
import re

from nbconvert.nbconvertapp import NbConvertApp
from nbconvert.postprocessors.base import PostProcessorBase
from notebook.services.contents.filemanager import FileContentsManager

from nbautoexport.clean import find_files_to_clean, FORMATS_WITH_IMAGE_DIR
from nbautoexport.sentinel import (
ExportFormat,
NbAutoexportConfig,
SAVE_PROGRESS_INDICATOR_FILE,
)
from nbautoexport.utils import cleared_argv


class CopyToSubfolderPostProcessor(PostProcessorBase):
def __init__(self, subfolder: str, export_format: ExportFormat):
self.subfolder = subfolder
self.export_format = export_format
super(CopyToSubfolderPostProcessor, self).__init__()

def postprocess(self, input: str):
""" Save converted file to a separate directory, removing cell numbers."""
if self.subfolder is None:
return

input = Path(input)

new_dir = input.parent / self.subfolder
new_dir.mkdir(exist_ok=True)
new_path = new_dir / input.name

if self.export_format == ExportFormat.pdf:
# Can't read pdf file as unicode, skip rest of postprocessing and just copy
input.replace(new_path)
return

# Rewrite converted file to new path, removing cell numbers
with input.open("r") as f:
text = f.read()
with new_path.open("w") as f:
f.write(re.sub(r"\n#\sIn\[(([0-9]+)|(\s))\]:\n{2}", "", text))

# For some formats, we also need to move the assets directory, for stuff like images
if self.export_format in FORMATS_WITH_IMAGE_DIR:
assets_dir = input.parent / f"{input.stem}_files"
if assets_dir.exists() and assets_dir.is_dir():
new_assets_dir = new_dir / f"{input.stem}_files"
new_assets_dir.mkdir(exist_ok=True)
for asset in assets_dir.iterdir():
asset.replace(new_assets_dir / asset.name)
assets_dir.rmdir()

input.unlink()


def post_save(model: dict, os_path: str, contents_manager: FileContentsManager):
"""Post-save hook for converting notebooks to other formats using Jupyter nbconvert and saving
in a subfolder.

The following arguments are standard for Jupyter post-save hooks. See [Jupyter Documentation](
https://jupyter-notebook.readthedocs.io/en/stable/extending/savehooks.html).

Args:
model (dict): the model representing the file. See [Jupyter documentation](
https://jupyter-notebook.readthedocs.io/en/stable/extending/contents.html#data-model).
os_path (str): the filesystem path to the file just written
contents_manager (FileContentsManager): FileContentsManager instance that hook is bound to
"""
# only do this for notebooks
if model["type"] != "notebook":
return

# only do this if we've added the special indicator file to the working directory
os_path = Path(os_path)
cwd = os_path.parent
save_progress_indicator = cwd / SAVE_PROGRESS_INDICATOR_FILE
should_convert = save_progress_indicator.exists()

if should_convert:
config = NbAutoexportConfig.parse_file(
path=save_progress_indicator, content_type="application/json"
)
export_notebook(os_path, config=config)

if config.clean:
# Remove files that are not notebooks or expected files
files_to_clean = find_files_to_clean(cwd, config)
for path in files_to_clean:
path.unlink()

# Remove empty subdirectories
subfolders = (d for d in cwd.iterdir() if d.is_dir())
for subfolder in subfolders:
if not any(subfolder.iterdir()):
subfolder.rmdir()


def export_notebook(notebook_path: Path, config: NbAutoexportConfig):
"""Export a given notebook file given configuration.

Args:
notebook_path (Path): path to notebook to export with nbconvert
config (NbAutoexportConfig): configuration
"""
with cleared_argv():
converter = NbConvertApp()

for export_format in config.export_formats:
if config.organize_by == "notebook":
subfolder = notebook_path.stem
elif config.organize_by == "extension":
subfolder = export_format.value

converter.postprocessor = CopyToSubfolderPostProcessor(
subfolder=subfolder, export_format=export_format
)
converter.export_format = export_format.value
converter.initialize()
converter.notebooks = [str(notebook_path)]
converter.convert_notebooks()
90 changes: 90 additions & 0 deletions nbautoexport/jupyter_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from inspect import getsourcelines
from pathlib import Path
from pkg_resources import parse_version
import re
import textwrap

from jupyter_core.paths import jupyter_config_dir
from traitlets.config.loader import Config

from nbautoexport.utils import __version__, logger


def initialize_post_save_hook(c: Config):
# >>> nbautoexport initialize, version=[{version}] >>>
try:
from nbautoexport import post_save

if callable(c.FileContentsManager.post_save_hook):
old_post_save = c.FileContentsManager.post_save_hook

def _post_save(model, os_path, contents_manager):
old_post_save(model=model, os_path=os_path, contents_manager=contents_manager)
post_save(model=model, os_path=os_path, contents_manager=contents_manager)

c.FileContentsManager.post_save_hook = _post_save
else:
c.FileContentsManager.post_save_hook = post_save
except Exception:
pass
# <<< nbautoexport initialize <<<
pass # need this line for above comment to be included in function source


post_save_hook_initialize_block = textwrap.dedent(
"".join(getsourcelines(initialize_post_save_hook)[0][1:-1]).format(version=__version__)
)

block_regex = re.compile(
r"# >>> nbautoexport initialize.*# <<< nbautoexport initialize <<<\n?",
re.DOTALL, # dot matches newline
)
version_regex = re.compile(r"(?<=# >>> nbautoexport initialize, version=\[).*(?=\] >>>)")


def install_post_save_hook():
"""Splices the post save hook into the global Jupyter configuration file
"""
config_dir = jupyter_config_dir()
config_path = (Path(config_dir) / "jupyter_notebook_config.py").expanduser().resolve()

if not config_path.exists():
logger.debug(f"No existing Jupyter configuration detected at {config_path}. Creating...")
config_path.parent.mkdir(exist_ok=True, parents=True)
with config_path.open("w") as fp:
fp.write(post_save_hook_initialize_block)
logger.info("nbautoexport post-save hook installed.")
return

# If config exists, check for existing nbautoexport initialize block and install as appropriate
logger.debug(f"Detected existing Jupyter configuration at {config_path}")

with config_path.open("r") as fp:
config = fp.read()

if block_regex.search(config):
logger.debug("Detected existing nbautoexport post-save hook.")

version_match = version_regex.search(config)
if version_match:
existing_version = version_match.group()
logger.debug(f"Existing post-save hook is version {existing_version}")
else:
existing_version = ""
logger.debug("Existing post-save hook predates versioning.")

if parse_version(existing_version) < parse_version(__version__):
logger.info(f"Updating nbautoexport post-save hook with version {__version__}...")
with config_path.open("w") as fp:
# Open as w replaces existing file. We're replacing entire config.
fp.write(block_regex.sub(post_save_hook_initialize_block, config))
else:
logger.debug("No changes made.")
return
else:
logger.info("Installing post-save hook.")
with config_path.open("a") as fp:
# Open as a just appends. We append block at the end of existing file.
fp.write("\n" + post_save_hook_initialize_block)

logger.info("nbautoexport post-save hook installed.")