DOC: script to build single docstring page (pandas-dev#19840)

harisbal · Mar 1, 2018 · c8859b5 · c8859b5
1 parent 52559f5
commit c8859b5
Show file tree

Hide file tree

Showing 4 changed files with 161 additions and 78 deletions.
diff --git a/doc/make.py b/doc/make.py
@@ -14,40 +14,21 @@
 import sys
 import os
 import shutil
-import subprocess
+# import subprocess
 import argparse
 from contextlib import contextmanager
+import webbrowser
 import jinja2
 
+import pandas
+
 
 DOC_PATH = os.path.dirname(os.path.abspath(__file__))
 SOURCE_PATH = os.path.join(DOC_PATH, 'source')
 BUILD_PATH = os.path.join(DOC_PATH, 'build')
 BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']
 
 
-def _generate_index(include_api, single_doc=None):
-    """Create index.rst file with the specified sections.
-
-    Parameters
-    ----------
-    include_api : bool
-        Whether API documentation will be built.
-    single_doc : str or None
-        If provided, this single documentation page will be generated.
-    """
-    if single_doc is not None:
-        single_doc = os.path.splitext(os.path.basename(single_doc))[0]
-        include_api = False
-
-    with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
-        t = jinja2.Template(f.read())
-
-    with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
-        f.write(t.render(include_api=include_api,
-                         single_doc=single_doc))
-
-
 @contextmanager
 def _maybe_exclude_notebooks():
     """Skip building the notebooks if pandoc is not installed.
@@ -58,6 +39,7 @@ def _maybe_exclude_notebooks():
     1. nbconvert isn't installed, or
     2. nbconvert is installed, but pandoc isn't
     """
+    # TODO move to exclude_pattern
     base = os.path.dirname(__file__)
     notebooks = [os.path.join(base, 'source', nb)
                  for nb in ['style.ipynb']]
@@ -96,8 +78,110 @@ class DocBuilder:
     All public methods of this class can be called as parameters of the
     script.
     """
-    def __init__(self, num_jobs=1):
+    def __init__(self, num_jobs=1, include_api=True, single_doc=None):
         self.num_jobs = num_jobs
+        self.include_api = include_api
+        self.single_doc = None
+        self.single_doc_type = None
+        if single_doc is not None:
+            self._process_single_doc(single_doc)
+        self.exclude_patterns = self._exclude_patterns
+
+        self._generate_index()
+        if self.single_doc_type == 'docstring':
+            self._run_os('sphinx-autogen', '-o',
+                         'source/generated_single', 'source/index.rst')
+
+    @property
+    def _exclude_patterns(self):
+        """Docs source files that will be excluded from building."""
+        # TODO move maybe_exclude_notebooks here
+        if self.single_doc is not None:
+            rst_files = [f for f in os.listdir(SOURCE_PATH)
+                         if ((f.endswith('.rst') or f.endswith('.ipynb'))
+                             and (f != 'index.rst')
+                             and (f != '{0}.rst'.format(self.single_doc)))]
+            if self.single_doc_type != 'api':
+                rst_files += ['generated/*.rst']
+        elif not self.include_api:
+            rst_files = ['api.rst', 'generated/*.rst']
+        else:
+            rst_files = ['generated_single/*.rst']
+
+        exclude_patterns = ','.join(
+            '{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files)
+
+        return exclude_patterns
+
+    def _process_single_doc(self, single_doc):
+        """Extract self.single_doc (base name) and self.single_doc_type from
+        passed single_doc kwarg.
+
+        """
+        self.include_api = False
+
+        if single_doc == 'api.rst':
+            self.single_doc_type = 'api'
+            self.single_doc = 'api'
+        elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
+            self.single_doc_type = 'rst'
+            self.single_doc = os.path.splitext(os.path.basename(single_doc))[0]
+        elif os.path.exists(
+                os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
+            self.single_doc_type = 'rst'
+            self.single_doc = single_doc
+        elif single_doc is not None:
+            try:
+                obj = pandas
+                for name in single_doc.split('.'):
+                    obj = getattr(obj, name)
+            except AttributeError:
+                raise ValueError('Single document not understood, it should '
+                                 'be a file in doc/source/*.rst (e.g. '
+                                 '"contributing.rst" or a pandas function or '
+                                 'method (e.g. "pandas.DataFrame.head")')
+            else:
+                self.single_doc_type = 'docstring'
+                if single_doc.startswith('pandas.'):
+                    self.single_doc = single_doc[len('pandas.'):]
+                else:
+                    self.single_doc = single_doc
+
+    def _copy_generated_docstring(self):
+        """Copy existing generated (from api.rst) docstring page because
+        this is more correct in certain cases (where a custom autodoc
+        template is used).
+
+        """
+        fname = os.path.join(SOURCE_PATH, 'generated',
+                             'pandas.{}.rst'.format(self.single_doc))
+        temp_dir = os.path.join(SOURCE_PATH, 'generated_single')
+
+        try:
+            os.makedirs(temp_dir)
+        except OSError:
+            pass
+
+        if os.path.exists(fname):
+            try:
+                # copying to make sure sphinx always thinks it is new
+                # and needs to be re-generated (to pick source code changes)
+                shutil.copy(fname, temp_dir)
+            except:  # noqa
+                pass
+
+    def _generate_index(self):
+        """Create index.rst file with the specified sections."""
+        if self.single_doc_type == 'docstring':
+            self._copy_generated_docstring()
+
+        with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
+            t = jinja2.Template(f.read())
+
+        with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
+            f.write(t.render(include_api=self.include_api,
+                             single_doc=self.single_doc,
+                             single_doc_type=self.single_doc_type))
 
     @staticmethod
     def _create_build_structure():
@@ -121,7 +205,10 @@ def _run_os(*args):
         --------
         >>> DocBuilder()._run_os('python', '--version')
         """
-        subprocess.check_call(args, stderr=subprocess.STDOUT)
+        # TODO check_call should be more safe, but it fails with
+        # exclude patterns, needs investigation
+        # subprocess.check_call(args, stderr=subprocess.STDOUT)
+        os.system(' '.join(args))
 
     def _sphinx_build(self, kind):
         """Call sphinx to build documentation.
@@ -142,11 +229,21 @@ def _sphinx_build(self, kind):
         self._run_os('sphinx-build',
                      '-j{}'.format(self.num_jobs),
                      '-b{}'.format(kind),
-                     '-d{}'.format(os.path.join(BUILD_PATH,
-                                                'doctrees')),
+                     '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
+                     '-Dexclude_patterns={}'.format(self.exclude_patterns),
                      SOURCE_PATH,
                      os.path.join(BUILD_PATH, kind))
 
+    def _open_browser(self):
+        base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
+        if self.single_doc_type == 'docstring':
+            url = os.path.join(
+                base_url,
+                'generated_single', 'pandas.{}.html'.format(self.single_doc))
+        else:
+            url = os.path.join(base_url, '{}.html'.format(self.single_doc))
+        webbrowser.open(url, new=2)
+
     def html(self):
         """Build HTML documentation."""
         self._create_build_structure()
@@ -156,6 +253,11 @@ def html(self):
             if os.path.exists(zip_fname):
                 os.remove(zip_fname)
 
+        if self.single_doc is not None:
+            self._open_browser()
+            shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'),
+                          ignore_errors=True)
+
     def latex(self, force=False):
         """Build PDF documentation."""
         self._create_build_structure()
@@ -222,8 +324,8 @@ def main():
                            metavar='FILENAME',
                            type=str,
                            default=None,
-                           help=('filename of section to compile, '
-                                 'e.g. "indexing"'))
+                           help=('filename of section or method name to '
+                                 'compile, e.g. "indexing", "DataFrame.join"'))
     argparser.add_argument('--python-path',
                            type=str,
                            default=os.path.join(DOC_PATH, '..'),
@@ -235,8 +337,10 @@ def main():
             args.command, ', '.join(cmds)))
 
     os.environ['PYTHONPATH'] = args.python_path
-    _generate_index(not args.no_api, args.single)
-    getattr(DocBuilder(args.num_jobs), args.command)()
+
+    getattr(DocBuilder(args.num_jobs,
+                       not args.no_api,
+                       args.single), args.command)()
 
 
 if __name__ == '__main__':

diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -18,7 +18,6 @@
 import importlib
 import warnings
 
-from pandas.compat import u, PY3
 
 try:
     raw_input          # Python 2
@@ -86,38 +85,6 @@
 if any(re.match("\s*api\s*", l) for l in index_rst_lines):
     autosummary_generate = True
 
-files_to_delete = []
-for f in os.listdir(os.path.dirname(__file__)):
-    if (not f.endswith(('.ipynb', '.rst')) or
-            f.startswith('.') or os.path.basename(f) == 'index.rst'):
-        continue
-
-    _file_basename = os.path.splitext(f)[0]
-    _regex_to_match = "\s*{}\s*$".format(_file_basename)
-    if not any(re.match(_regex_to_match, line) for line in index_rst_lines):
-        files_to_delete.append(f)
-
-if files_to_delete:
-    print("I'm about to DELETE the following:\n{}\n".format(
-        list(sorted(files_to_delete))))
-    sys.stdout.write("WARNING: I'd like to delete those "
-                     "to speed up processing (yes/no)? ")
-    if PY3:
-        answer = input()
-    else:
-        answer = raw_input()
-
-    if answer.lower().strip() in ('y', 'yes'):
-        for f in files_to_delete:
-            f = os.path.join(os.path.join(os.path.dirname(__file__), f))
-            f = os.path.abspath(f)
-            try:
-                print("Deleting {}".format(f))
-                os.unlink(f)
-            except:
-                print("Error deleting {}".format(f))
-                pass
-
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['../_templates']
 
@@ -131,8 +98,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u('pandas')
-copyright = u('2008-2014, the pandas development team')
+project = u'pandas'
+copyright = u'2008-2014, the pandas development team'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -343,8 +310,8 @@
 # file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
     ('index', 'pandas.tex',
-     u('pandas: powerful Python data analysis toolkit'),
-     u('Wes McKinney\n\& PyData Development Team'), 'manual'),
+     u'pandas: powerful Python data analysis toolkit',
+     u'Wes McKinney\n\& PyData Development Team', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -171,7 +171,7 @@ We'll now kick off a three-step process:
    # Create and activate the build environment
    conda env create -f ci/environment-dev.yaml
    conda activate pandas-dev
-   
+
    # or with older versions of Anaconda:
    source activate pandas-dev
 
@@ -388,14 +388,11 @@ If you want to do a full clean build, do::
     python make.py html
 
 You can tell ``make.py`` to compile only a single section of the docs, greatly
-reducing the turn-around time for checking your changes. You will be prompted to
-delete ``.rst`` files that aren't required. This is okay because the prior
-versions of these files can be checked out from git. However, you must make sure
-not to commit the file deletions to your Git repository!
+reducing the turn-around time for checking your changes.
 
 ::
 
-    #omit autosummary and API section
+    # omit autosummary and API section
     python make.py clean
     python make.py --no-api
 
@@ -404,10 +401,20 @@ not to commit the file deletions to your Git repository!
     python make.py clean
     python make.py --single indexing
 
-For comparison, a full documentation build may take 10 minutes, a ``-no-api`` build
-may take 3 minutes and a single section may take 15 seconds.  Subsequent builds, which
-only process portions you have changed, will be faster. Open the following file in a web
-browser to see the full documentation you just built::
+    # compile the reference docs for a single function
+    python make.py clean
+    python make.py --single DataFrame.join
+
+For comparison, a full documentation build may take 15 minutes, but a single
+section may take 15 seconds. Subsequent builds, which only process portions
+you have changed, will be faster.
+
+You can also specify to use multiple cores to speed up the documentation build::
+
+    python make.py html --num-jobs 4
+
+Open the following file in a web browser to see the full documentation you
+just built::
 
     pandas/docs/build/html/index.html
 

diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
@@ -106,8 +106,13 @@ Some other notes
 See the package overview for more detail about what's in the library.
 
 
+{% if single_doc_type == 'docstring' -%}
+.. autosummary::
+    :toctree: generated_single/
+{% else -%}
 .. toctree::
     :maxdepth: 4
+{% endif %}
 
     {% if single_doc -%}
     {{ single_doc }}