DOC: Fix docbuild issues

Fix reamining docbuild issues, esp on Windows Deduplicate some files
bashtage · May 3, 2019 · 6fcd769 · 6fcd769
1 parent ae1b72c
commit 6fcd769
Show file tree

Hide file tree

Showing 7 changed files with 47 additions and 28 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,9 +6,9 @@ dist
 #docs build and others
 #generated  #not yet? generated for dataset not rebuild
 docs/source/generated
-docs/source/datasets/generated
 docs/source/dev/generated
 docs/source/examples/generated
+docs/source/datasets/generated
 docs/source/examples/notebooks/generated
 docs/source/datasets/statsmodels.datasets.*
 docs/source/savefig

diff --git a/docs/make.bat b/docs/make.bat
@@ -47,8 +47,8 @@ echo %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
 if errorlevel 1 exit /b 1
 
 if "%1" == "html" (
-    echo xcopy /s source/examples/notebooks/generated/*.html %BUILDDIR%/html/examples/notebooks/generated
-    xcopy /s source/examples/notebooks/generated/*.html %BUILDDIR%/html/examples/notebooks/generated
+    echo xcopy /s /y source\examples\notebooks\generated\*.html %BUILDDIR%\html\examples\notebooks\generated
+    xcopy /s /y source\examples\notebooks\generated\*.html %BUILDDIR%\html\examples\notebooks\generated
 	echo python %TOOLSPATH%/%FOLDTOC% %BUILDDIR%/html/index.html
 	python %TOOLSPATH%/%FOLDTOC% %BUILDDIR%/html/index.html
 	echo python %TOOLSPATH%/%FOLDTOC% %BUILDDIR%/html/examples/index.html ../_static

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -269,7 +269,8 @@
 epub_title = u'statsmodels'
 epub_author = u'Josef Perktold, Skipper Seabold'
 epub_publisher = u'Josef Perktold, Skipper Seabold'
-epub_copyright = u'2009-2018, Josef Perktold, Skipper Seabold, Jonathan Taylor, statsmodels-developers'
+epub_copyright = u'2009-2019, Josef Perktold, Skipper Seabold, ' \
+                 u'Jonathan Taylor, statsmodels-developers'
 
 # The language of the text. It defaults to the language option
 # or en if the language is not set.

diff --git a/docs/source/large_data.rst b/docs/source/large_data.rst
@@ -1,19 +1,19 @@
 .. module:: statsmodels.base.distributed_estimation
 .. currentmodule:: statsmodels.base.distributed_estimation
 
-Working with "large"-ish data
-=============================
+Working with Large Data Sets
+============================
 
 Big data is something of a buzzword in the modern world. While statsmodels
-works well with small and moderately-sized datasets that can be loaded in
+works well with small and moderately-sized data sets that can be loaded in
 memory--perhaps tens of thousands of observations--use cases exist with
-millions of observations or more. Depending your usecase, statsmodels may or
+millions of observations or more. Depending your use case, statsmodels may or
 may not be a sufficient tool.
 
 statsmodels and most of the software stack it is written on operates in
-memory. Resultantly, building models on larger datasets can be challenging
+memory. Resultantly, building models on larger data sets can be challenging
 or even impractical. With that said, there are 2 general strategies for
-building models on larger datasets with statsmodels.
+building models on larger data sets with statsmodels.
 
 Divide and Conquer - Distributing Jobs
 --------------------------------------
@@ -38,7 +38,7 @@ A detailed example is available
 Subsetting your data
 --------------------
 
-If your entire dataset is too large to store in memory, you might try storing
+If your entire data set is too large to store in memory, you might try storing
 it in a columnar container like `Apache Paruqet <https://parquet.apache.org/>`_
 or `bcolz <http://bcolz.blosc.org/en/latest/>`_. Using the patsy formula
 interface, statsmodels will use the `__getitem__` function (i.e. data['Item'])

diff --git a/docs/source/optimization.rst b/docs/source/optimization.rst
@@ -27,27 +27,34 @@ associated with that specific optimizer:
 
 - ``newton`` - Newton-Raphson iteration. While not directly from scipy, we
   consider it an optimizer because only the score and hessian are required.
+
     tol : float
         Relative error in params acceptable for convergence.
+
 - ``nm`` - scipy's ``fmin_nm``
+
     xtol : float
         Relative error in params acceptable for convergence
     ftol : float
         Relative error in loglike(params) acceptable for
         convergence
     maxfun : int
         Maximum number of function evaluations to make.
+
 - ``bfgs`` - Broyden–Fletcher–Goldfarb–Shanno optimization, scipy's
   ``fmin_bfgs``.
+
       gtol : float
           Stop when norm of gradient is less than gtol.
       norm : float
           Order of norm (np.Inf is max, -np.Inf is min)
       epsilon
           If fprime is approximated, use this value for the step
           size. Only relevant if LikelihoodModel.score is None.
+
 - ``lbfgs`` - A more memory-efficient (limited memory) implementation of
   ``bfgs``. Scipy's ``fmin_l_bfgs_b``.
+
       m : int
           The maximum number of variable metric corrections used to
           define the limited memory matrix. (The limited memory BFGS
@@ -74,7 +81,9 @@ associated with that specific optimizer:
       approx_grad : bool
           Whether to approximate the gradient numerically (in which
           case func returns only the function value).
+
 - ``cg`` - Conjugate gradient optimization. Scipy's ``fmin_cg``.
+
       gtol : float
           Stop when norm of gradient is less than gtol.
       norm : float
@@ -83,7 +92,9 @@ associated with that specific optimizer:
           If fprime is approximated, use this value for the step
           size. Can be scalar or vector.  Only relevant if
           Likelihoodmodel.score is None.
+
 - ``ncg`` - Newton conjugate gradient. Scipy's ``fmin_ncg``.
+
       fhess_p : callable f'(x, \*args)
           Function which computes the Hessian of f times an arbitrary
           vector, p.  Should only be supplied if
@@ -94,7 +105,9 @@ associated with that specific optimizer:
       epsilon : float or ndarray
           If fhess is approximated, use this value for the step size.
           Only relevant if Likelihoodmodel.hessian is None.
+
 - ``powell`` - Powell's method. Scipy's ``fmin_powell``.
+
       xtol : float
           Line-search error tolerance
       ftol : float
@@ -104,8 +117,10 @@ associated with that specific optimizer:
           Maximum number of function evaluations to make.
       start_direc : ndarray
           Initial direction set.
+
 - ``basinhopping`` - Basin hopping. This is part of scipy's ``basinhopping``
   tools.
+
       niter : integer
           The number of basin hopping iterations.
       niter_success : integer
@@ -130,13 +145,15 @@ associated with that specific optimizer:
           - `args` <- `fargs`
           - `jac` <- `score`
           - `hess` <- `hess`
-  - ``minimize`` - Allows the use of any scipy optimizer.
-      min_method : str, optional
-          Name of minimization method to use.
-          Any method specific arguments can be passed directly.
-          For a list of methods and their arguments, see
-          documentation of `scipy.optimize.minimize`.
-          If no method is specified, then BFGS is used.
+
+- ``minimize`` - Allows the use of any scipy optimizer.
+
+  min_method : str, optional
+      Name of minimization method to use.
+      Any method specific arguments can be passed directly.
+      For a list of methods and their arguments, see
+      documentation of `scipy.optimize.minimize`.
+      If no method is specified, then BFGS is used.
 
 Model Class
 -----------

diff --git a/statsmodels/base/optimizer.py b/statsmodels/base/optimizer.py
@@ -371,10 +371,11 @@ def _fit_bfgs(f, score, start_params, fargs, kwargs, disp=True,
     return xopt, retvals
 
 
-def _fit_lbfgs(f, score, start_params, fargs, kwargs, disp=True,
-                   maxiter=100, callback=None, retall=False,
-                   full_output=True, hess=None):
+def _fit_lbfgs(f, score, start_params, fargs, kwargs, disp=True, maxiter=100,
+               callback=None, retall=False, full_output=True, hess=None):
     """
+    Fit model using L-BFGS algorithm
+
     Parameters
     ----------
     f : function

diff --git a/tools/fold_toc.py b/tools/fold_toc.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-import sys
 import re
+import sys
 
 # Read doc to string
 filename = sys.argv[1]
@@ -32,27 +32,27 @@
     post_n = re.sub('#', str(i), post)
     doc = re.sub(pre, post_n, doc, count=1)
 
-## TOC entries
+# TOC entries
 pre = '<li class="toctree-l1">'
 post = '<li class="liClosed"> '
-doc =  re.sub(pre, post, doc)
+doc = re.sub(pre, post, doc)
 
 # TOC entries 2nd level
 pre = '<li class="toctree-l2">'
 post = '<li class="liClosed"> '
-doc =  re.sub(pre, post, doc)
+doc = re.sub(pre, post, doc)
 
 # TOC entries 3rd level
 pre = '<li class="toctree-l3">'
 post = '<li class="liClosed"> '
-doc =  re.sub(pre, post, doc)
+doc = re.sub(pre, post, doc)
 
 # TOC entries 4th level
 pre = '<li class="toctree-l4">'
 post = '<li class="liClosed"> '
-doc =  re.sub(pre, post, doc)
+doc = re.sub(pre, post, doc)
 
 # Write to file
-f = open(filename, 'w')
+f = open(filename, 'w', encoding='utf8')
 f.write(doc)
 f.close()