Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor textbook generator to check redirects #27

Merged
merged 4 commits into from Dec 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
43 changes: 39 additions & 4 deletions scripts/generate_book.py
Expand Up @@ -8,6 +8,7 @@
from tqdm import tqdm
import numpy as np
from glob import glob
from uuid import uuid4
import argparse

DESCRIPTION = ("Convert a collection of Jupyter Notebooks into Jekyll "
Expand All @@ -26,6 +27,8 @@
parser.add_argument("--path-toc", default=None, help="Path to the Table of Contents YAML file")
parser.add_argument("--overwrite", action='store_true', help="Overwrite md files if they already exist.")
parser.add_argument("--execute", action='store_true', help="Execute notebooks before converting to MD.")
parser.add_argument("--local-build", action='store_true',
help="Specify you are building site locally for later upload.")
parser.set_defaults(overwrite=False, execute=False)

# Defaults
Expand Down Expand Up @@ -75,6 +78,28 @@ def _copy_non_content_files():
sh.copy2(ifile, new_path)


def _case_sensitive_fs(path):
"""True when filesystem at `path` is case sensitive, False otherwise.

Checks this by attempting to write two files, one w/ upper case, one
with lower. If after this only one file exists, the system is case-insensitive.

Makes directory `path` if it does not exist.
"""
if not op.exists(path):
os.makedirs(path)
root = op.join(path, uuid4().hex)
fnames = [root + suffix for suffix in 'aA']
try:
for fname in fnames:
with open(fname, 'wt') as fobj:
fobj.write('text')
written = glob(root + '*')
finally:
for fname in written:
os.unlink(fname)
return len(written) == 2


if __name__ == '__main__':
###############################################################################
Expand Down Expand Up @@ -118,6 +143,7 @@ def _copy_non_content_files():

n_skipped_files = 0
n_built_files = 0
case_check = _case_sensitive_fs(BUILD_FOLDER) and args.local_build
print("Convert and copy notebook/md files...")
for ix_file, page in enumerate(tqdm(list(toc))):
url_page = page.get('url', None)
Expand Down Expand Up @@ -233,10 +259,19 @@ def _copy_non_content_files():
# Front-matter YAML
yaml_fm = []
yaml_fm += ['---']
yaml_fm += ['redirect_from:']
yaml_fm += [' - "{}"'.format(_prepare_url(url_page).replace('_', '-').lower())]
if ix_file == 0:
yaml_fm += [' - "/"']
# In case pre-existing links are sanitized
sanitized = url_page.lower().replace('_', '-')
if sanitized != url_page:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok one more annoying gotcha, here it would also fail if people used all lower-case names, but also used underscores. E.g.

if the page name is my_page.html

then the sanitized is my-page.html

then the sanitized != url_page is True and the case check will raise the error.

I think we could get around this w/ the following suggestion:

Suggested change
if sanitized != url_page:
if sanitized != url_page.replace('_', '-'):

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for fixing these up @matthew-brett - sorry for the slow response but I was off at the neurips conference and just got back yesterday!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that's what the check is for on the next line - no? I mean, in that case, url_page.lower() != sanitized and it won't raise the error. Or am I not thinking straight?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ohhh you're right. In my example above, the first check (sanitized != url_page would be True, but the second check url_page.lower() == sanitized would be False so the error wouldn't be triggered...in that case I think this LGTM! Merging away, thanks for grinding through this one w/ me :-)

if case_check and url_page.lower() == sanitized:
raise RuntimeError(
'Redirect {} clashes with page {} for local build on '
'case-insensitive FS\n'.format(sanitized, url_page) +
'Rename source page to lower case or build on a case '
'sensitive FS, e.g. case-sensitive disk image on Mac')
yaml_fm += ['redirect_from:']
yaml_fm += [' - "{}"'.format(sanitized)]
if ix_file == 0:
yaml_fm += [' - "/"']
if path_url_page.endswith('.ipynb'):
interact_path = 'content/' + path_url_page.split('content/')[-1]
yaml_fm += ['interact_link: {}'.format(interact_path)]
Expand Down
3 changes: 2 additions & 1 deletion scripts/tests/test_build.py
Expand Up @@ -31,9 +31,10 @@ def is_not_in(lines, check):
if op.isdir(op.join(curdir, 'site', '_build')):
sh.rmtree(op.join(curdir, 'site', '_build'))

print("Building site for test suite...")
cmd = ["python", op.join(curdir, "..", "generate_book.py"),
"--site-root", op.join(curdir, "site"), "--path-template", op.join(curdir, "..", "templates", "jekyllmd.tpl")]
out = subprocess.call(cmd)
out = subprocess.check_call(cmd)

####################################################
# Check outputs
Expand Down
3 changes: 3 additions & 0 deletions scripts/tests/test_license.py
Expand Up @@ -10,6 +10,9 @@

def test_license():
# Not yes/no answers should error
if op.exists(op.join(curdir, "site", "content", "LICENSE.md")):
os.remove(op.join(curdir, "site", "content", "LICENSE.md"))

cmd_error = ["python", op.join(curdir, "..", "license.py"),
"--path", op.join(curdir, "site", "content"), "--use-license", "blah"]

Expand Down