From c8859b57b891701f250fb05f2cc60d2e6cae2d6b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 1 Mar 2018 10:35:05 +0100 Subject: [PATCH] DOC: script to build single docstring page (#19840) --- doc/make.py | 166 +++++++++++++++++++++++++++------- doc/source/conf.py | 41 +-------- doc/source/contributing.rst | 27 ++++-- doc/source/index.rst.template | 5 + 4 files changed, 161 insertions(+), 78 deletions(-) diff --git a/doc/make.py b/doc/make.py index e3cb29aa3e086..2819a62347627 100755 --- a/doc/make.py +++ b/doc/make.py @@ -14,11 +14,14 @@ import sys import os import shutil -import subprocess +# import subprocess import argparse from contextlib import contextmanager +import webbrowser import jinja2 +import pandas + DOC_PATH = os.path.dirname(os.path.abspath(__file__)) SOURCE_PATH = os.path.join(DOC_PATH, 'source') @@ -26,28 +29,6 @@ BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates'] -def _generate_index(include_api, single_doc=None): - """Create index.rst file with the specified sections. - - Parameters - ---------- - include_api : bool - Whether API documentation will be built. - single_doc : str or None - If provided, this single documentation page will be generated. - """ - if single_doc is not None: - single_doc = os.path.splitext(os.path.basename(single_doc))[0] - include_api = False - - with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f: - t = jinja2.Template(f.read()) - - with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f: - f.write(t.render(include_api=include_api, - single_doc=single_doc)) - - @contextmanager def _maybe_exclude_notebooks(): """Skip building the notebooks if pandoc is not installed. @@ -58,6 +39,7 @@ def _maybe_exclude_notebooks(): 1. nbconvert isn't installed, or 2. nbconvert is installed, but pandoc isn't """ + # TODO move to exclude_pattern base = os.path.dirname(__file__) notebooks = [os.path.join(base, 'source', nb) for nb in ['style.ipynb']] @@ -96,8 +78,110 @@ class DocBuilder: All public methods of this class can be called as parameters of the script. """ - def __init__(self, num_jobs=1): + def __init__(self, num_jobs=1, include_api=True, single_doc=None): self.num_jobs = num_jobs + self.include_api = include_api + self.single_doc = None + self.single_doc_type = None + if single_doc is not None: + self._process_single_doc(single_doc) + self.exclude_patterns = self._exclude_patterns + + self._generate_index() + if self.single_doc_type == 'docstring': + self._run_os('sphinx-autogen', '-o', + 'source/generated_single', 'source/index.rst') + + @property + def _exclude_patterns(self): + """Docs source files that will be excluded from building.""" + # TODO move maybe_exclude_notebooks here + if self.single_doc is not None: + rst_files = [f for f in os.listdir(SOURCE_PATH) + if ((f.endswith('.rst') or f.endswith('.ipynb')) + and (f != 'index.rst') + and (f != '{0}.rst'.format(self.single_doc)))] + if self.single_doc_type != 'api': + rst_files += ['generated/*.rst'] + elif not self.include_api: + rst_files = ['api.rst', 'generated/*.rst'] + else: + rst_files = ['generated_single/*.rst'] + + exclude_patterns = ','.join( + '{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files) + + return exclude_patterns + + def _process_single_doc(self, single_doc): + """Extract self.single_doc (base name) and self.single_doc_type from + passed single_doc kwarg. + + """ + self.include_api = False + + if single_doc == 'api.rst': + self.single_doc_type = 'api' + self.single_doc = 'api' + elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)): + self.single_doc_type = 'rst' + self.single_doc = os.path.splitext(os.path.basename(single_doc))[0] + elif os.path.exists( + os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))): + self.single_doc_type = 'rst' + self.single_doc = single_doc + elif single_doc is not None: + try: + obj = pandas + for name in single_doc.split('.'): + obj = getattr(obj, name) + except AttributeError: + raise ValueError('Single document not understood, it should ' + 'be a file in doc/source/*.rst (e.g. ' + '"contributing.rst" or a pandas function or ' + 'method (e.g. "pandas.DataFrame.head")') + else: + self.single_doc_type = 'docstring' + if single_doc.startswith('pandas.'): + self.single_doc = single_doc[len('pandas.'):] + else: + self.single_doc = single_doc + + def _copy_generated_docstring(self): + """Copy existing generated (from api.rst) docstring page because + this is more correct in certain cases (where a custom autodoc + template is used). + + """ + fname = os.path.join(SOURCE_PATH, 'generated', + 'pandas.{}.rst'.format(self.single_doc)) + temp_dir = os.path.join(SOURCE_PATH, 'generated_single') + + try: + os.makedirs(temp_dir) + except OSError: + pass + + if os.path.exists(fname): + try: + # copying to make sure sphinx always thinks it is new + # and needs to be re-generated (to pick source code changes) + shutil.copy(fname, temp_dir) + except: # noqa + pass + + def _generate_index(self): + """Create index.rst file with the specified sections.""" + if self.single_doc_type == 'docstring': + self._copy_generated_docstring() + + with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f: + t = jinja2.Template(f.read()) + + with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f: + f.write(t.render(include_api=self.include_api, + single_doc=self.single_doc, + single_doc_type=self.single_doc_type)) @staticmethod def _create_build_structure(): @@ -121,7 +205,10 @@ def _run_os(*args): -------- >>> DocBuilder()._run_os('python', '--version') """ - subprocess.check_call(args, stderr=subprocess.STDOUT) + # TODO check_call should be more safe, but it fails with + # exclude patterns, needs investigation + # subprocess.check_call(args, stderr=subprocess.STDOUT) + os.system(' '.join(args)) def _sphinx_build(self, kind): """Call sphinx to build documentation. @@ -142,11 +229,21 @@ def _sphinx_build(self, kind): self._run_os('sphinx-build', '-j{}'.format(self.num_jobs), '-b{}'.format(kind), - '-d{}'.format(os.path.join(BUILD_PATH, - 'doctrees')), + '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')), + '-Dexclude_patterns={}'.format(self.exclude_patterns), SOURCE_PATH, os.path.join(BUILD_PATH, kind)) + def _open_browser(self): + base_url = os.path.join('file://', DOC_PATH, 'build', 'html') + if self.single_doc_type == 'docstring': + url = os.path.join( + base_url, + 'generated_single', 'pandas.{}.html'.format(self.single_doc)) + else: + url = os.path.join(base_url, '{}.html'.format(self.single_doc)) + webbrowser.open(url, new=2) + def html(self): """Build HTML documentation.""" self._create_build_structure() @@ -156,6 +253,11 @@ def html(self): if os.path.exists(zip_fname): os.remove(zip_fname) + if self.single_doc is not None: + self._open_browser() + shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'), + ignore_errors=True) + def latex(self, force=False): """Build PDF documentation.""" self._create_build_structure() @@ -222,8 +324,8 @@ def main(): metavar='FILENAME', type=str, default=None, - help=('filename of section to compile, ' - 'e.g. "indexing"')) + help=('filename of section or method name to ' + 'compile, e.g. "indexing", "DataFrame.join"')) argparser.add_argument('--python-path', type=str, default=os.path.join(DOC_PATH, '..'), @@ -235,8 +337,10 @@ def main(): args.command, ', '.join(cmds))) os.environ['PYTHONPATH'] = args.python_path - _generate_index(not args.no_api, args.single) - getattr(DocBuilder(args.num_jobs), args.command)() + + getattr(DocBuilder(args.num_jobs, + not args.no_api, + args.single), args.command)() if __name__ == '__main__': diff --git a/doc/source/conf.py b/doc/source/conf.py index b5fbf096f2626..835127e5094e4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -18,7 +18,6 @@ import importlib import warnings -from pandas.compat import u, PY3 try: raw_input # Python 2 @@ -86,38 +85,6 @@ if any(re.match("\s*api\s*", l) for l in index_rst_lines): autosummary_generate = True -files_to_delete = [] -for f in os.listdir(os.path.dirname(__file__)): - if (not f.endswith(('.ipynb', '.rst')) or - f.startswith('.') or os.path.basename(f) == 'index.rst'): - continue - - _file_basename = os.path.splitext(f)[0] - _regex_to_match = "\s*{}\s*$".format(_file_basename) - if not any(re.match(_regex_to_match, line) for line in index_rst_lines): - files_to_delete.append(f) - -if files_to_delete: - print("I'm about to DELETE the following:\n{}\n".format( - list(sorted(files_to_delete)))) - sys.stdout.write("WARNING: I'd like to delete those " - "to speed up processing (yes/no)? ") - if PY3: - answer = input() - else: - answer = raw_input() - - if answer.lower().strip() in ('y', 'yes'): - for f in files_to_delete: - f = os.path.join(os.path.join(os.path.dirname(__file__), f)) - f = os.path.abspath(f) - try: - print("Deleting {}".format(f)) - os.unlink(f) - except: - print("Error deleting {}".format(f)) - pass - # Add any paths that contain templates here, relative to this directory. templates_path = ['../_templates'] @@ -131,8 +98,8 @@ master_doc = 'index' # General information about the project. -project = u('pandas') -copyright = u('2008-2014, the pandas development team') +project = u'pandas' +copyright = u'2008-2014, the pandas development team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -343,8 +310,8 @@ # file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'pandas.tex', - u('pandas: powerful Python data analysis toolkit'), - u('Wes McKinney\n\& PyData Development Team'), 'manual'), + u'pandas: powerful Python data analysis toolkit', + u'Wes McKinney\n\& PyData Development Team', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 258ab874cafcf..e159af9958fde 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -171,7 +171,7 @@ We'll now kick off a three-step process: # Create and activate the build environment conda env create -f ci/environment-dev.yaml conda activate pandas-dev - + # or with older versions of Anaconda: source activate pandas-dev @@ -388,14 +388,11 @@ If you want to do a full clean build, do:: python make.py html You can tell ``make.py`` to compile only a single section of the docs, greatly -reducing the turn-around time for checking your changes. You will be prompted to -delete ``.rst`` files that aren't required. This is okay because the prior -versions of these files can be checked out from git. However, you must make sure -not to commit the file deletions to your Git repository! +reducing the turn-around time for checking your changes. :: - #omit autosummary and API section + # omit autosummary and API section python make.py clean python make.py --no-api @@ -404,10 +401,20 @@ not to commit the file deletions to your Git repository! python make.py clean python make.py --single indexing -For comparison, a full documentation build may take 10 minutes, a ``-no-api`` build -may take 3 minutes and a single section may take 15 seconds. Subsequent builds, which -only process portions you have changed, will be faster. Open the following file in a web -browser to see the full documentation you just built:: + # compile the reference docs for a single function + python make.py clean + python make.py --single DataFrame.join + +For comparison, a full documentation build may take 15 minutes, but a single +section may take 15 seconds. Subsequent builds, which only process portions +you have changed, will be faster. + +You can also specify to use multiple cores to speed up the documentation build:: + + python make.py html --num-jobs 4 + +Open the following file in a web browser to see the full documentation you +just built:: pandas/docs/build/html/index.html diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template index eff1227e98994..cb6cce5edaf79 100644 --- a/doc/source/index.rst.template +++ b/doc/source/index.rst.template @@ -106,8 +106,13 @@ Some other notes See the package overview for more detail about what's in the library. +{% if single_doc_type == 'docstring' -%} +.. autosummary:: + :toctree: generated_single/ +{% else -%} .. toctree:: :maxdepth: 4 +{% endif %} {% if single_doc -%} {{ single_doc }}