Skip to content

Commit

Permalink
Add simple CLI with recursive notebook discovery
Browse files Browse the repository at this point in the history
  • Loading branch information
jbn committed May 3, 2017
1 parent 8482fa5 commit ea4b984
Show file tree
Hide file tree
Showing 13 changed files with 536 additions and 16 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dist/
nbmerge.egg-info/

build/
17 changes: 17 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,28 @@ Installation
Usage
=====

For the usage as originally specified by
`@fperez <https://github.com/fperez>`__'s gist,

.. code:: sh
nbmerge file_1.ipynb file_2.ipynb file_3.ipynb > merged.ipynb
Alternatively, nbmerge can cursively collect all files in the current
directory and below, recursively. After collection, it sorts them
lexicographically. You can use a regular expression as a file name
predicate. All ``.ipynb_checkpoints`` are automatically ignored. And, you
can use the `-i` option to ignore any notebook prefixed with an underscore
(think pseudo-private in python).

For example, the following command collects all notebooks in your project
that have the word `intro` in the file name and saves it to a merged file
named `_merged.ipynb`,

.. code:: sh
nbmerge --recursive -i -f ".*intro.*" -o _merged.ipynb
Lineage
=======

Expand Down
121 changes: 107 additions & 14 deletions nbmerge/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from __future__ import print_function
import codecs
import argparse
import io
import re
import os
import sys

from nbformat import read as read_notebook
from nbformat import write as write_notebook

# See:
# - stackoverflow.com/a/1169209
# - github.com/kynan/nbstripout/commit/8e26f4df317fde8b935df8e4930b32c74f834cf9
sys.stdout = codecs.getwriter('utf8')(sys.stdout)

__title__ = "nbmerge"
__description__ = "A tool to merge / concatenate Jupyter (IPython) notebooks"
Expand All @@ -25,7 +23,7 @@
__email__ = "jbn@abreka.com"


def merge_notebooks(file_paths):
def merge_notebooks(file_paths, verbose=False):
"""
Merge the given notebooks into one notebook.
Expand All @@ -38,11 +36,21 @@ def merge_notebooks(file_paths):
but the first notebook has a key path of metadata.ns.y, the second
data's entry is overwritten. It does not recursively descend into
the dictionaries.
:param file_paths: the ordered file paths to the notebooks for
concatenation
:param verbose: if True, print message for each notebook when processing
:return: the merged notebook
"""
merged, metadata = None, []

if verbose:
print("Merging notebooks...")

for path in file_paths:
with io.open(path, 'r', encoding='utf-8') as fp:
if verbose:
print("\tReading `{}`".format(path))
nb = read_notebook(fp, as_version=4)

metadata.append(nb.metadata)
Expand All @@ -52,6 +60,9 @@ def merge_notebooks(file_paths):
else:
merged.cells.extend(nb.cells)

if verbose:
print("Merging metadata in reverse order...")

merged_metadata = {}
for meta in reversed(metadata):
merged_metadata.update(meta)
Expand All @@ -60,12 +71,94 @@ def merge_notebooks(file_paths):
return merged


def recursive_find(ignore_underscored, filter_re):
"""
Find all notebooks relative to the cwd which match the filtering criteria.
:param ignore_underscored: filter out all notebooks which begin with
an underscore prefix, irrespective of the filter regexp
:param filter_re: a filter for file name acceptance
:return: lexicographically ordered list of notebook file paths
"""
filter_re = re.compile(filter_re or ".*")

file_paths = []

for dir_path, dir_names, file_names in os.walk(os.getcwd()):
# I can't think of a scenario where you'd ever want checkpoints.
if os.path.basename(dir_path) == ".ipynb_checkpoints":
continue

for file_name in file_names:
if not file_name.endswith(".ipynb"):
continue

if ignore_underscored and file_name.startswith('_'):
continue

if not filter_re.match(file_name):
continue

file_paths.append(os.path.join(dir_path, file_name))

return sorted(file_paths) # For lexicographic sorting


def parse_plan(args=None):
"""
Parse the command line arguments and produce an execution plan.
"""
parser = argparse.ArgumentParser("Merge a set of notebooks into one.")

parser.add_argument("files",
help="Paths to files to merge",
nargs="*")

parser.add_argument("-o", "--output",
help="Write to the specified file")

parser.add_argument("-f", "--filter-re",
help="Regexp for filename acceptance")
parser.add_argument("-i", "--ignore-underscored",
help="Ignore notebooks with underscore prefix",
action="store_true")
parser.add_argument("-r", "--recursive",
help="Merge all notebooks in subdirectories",
action="store_true")
parser.add_argument("-v", "--verbose",
help="Print progress as processing",
action="store_true")

args = parser.parse_args(args)

file_paths = args.files[:]
for file_path in file_paths:
if not os.path.exists(file_path):
print("Notebook `{}` does not exist".format(file_path))
sys.exit(1)

if args.recursive:
# If you specify any files, they are added first, in order.
# This is useful for a header notebook of some sort.
file_paths.extend(recursive_find(args.ignore_underscored,
args.filter_re))
return {'notebooks': file_paths,
'output_file': args.output,
'verbose': args.verbose}


def main():
notebooks = sys.argv[1:]
if not notebooks:
print("Usage: nbconvert a.ipynb b.ipynb > merged.ipynb",
file=sys.stderr)
sys.exit(1)

nb = merge_notebooks(notebooks)
write_notebook(nb, sys.stdout)
plan = parse_plan()

nb = merge_notebooks(plan['notebooks'])

if plan['output_file'] is None:
# See:
# - http://stackoverflow.com/a/1169209
# - http://github.com/kynan/nbstripout/commit/8e26f4df
# import codecs
# write_notebook(nb, codecs.getwriter('utf8')(sys.stdout))
write_notebook(nb, sys.stdout)
else:
with io.open(plan['output_file'], 'w', encoding='utf8') as fp:
write_notebook(nb, fp)
42 changes: 42 additions & 0 deletions tests/fixtures/.ipynb_checkpoints/1_Intro-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebook 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Some markdown."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"test_meta": {
"title": "Page 1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
101 changes: 101 additions & 0 deletions tests/fixtures/.ipynb_checkpoints/2_Middle-checkpoint.ipynb

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions tests/fixtures/.ipynb_checkpoints/3_Conclusion-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Notebook 3"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"The end."
]
}
],
"metadata": {
"final_answer": 42,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"test_meta": {
"title": "Page 3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
41 changes: 41 additions & 0 deletions tests/fixtures/.ipynb_checkpoints/Header-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# In the beginning, there was House Music"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Hidden, Normally"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Empty file.

0 comments on commit ea4b984

Please sign in to comment.