From 576b362e9b4fb1f956feb63f69ff70dcce770ab7 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Sat, 25 Jul 2020 23:51:32 +0100
Subject: [PATCH] ENH: Add tool to simpligy documenting API in release notes

Add tool to walk API and generate a JSON
---
 statsmodels/stats/multivariate.py |  18 +-
 tools/enumerate-api.py            | 284 ++++++++++++++++++++++++++++++
 2 files changed, 294 insertions(+), 8 deletions(-)
 create mode 100644 tools/enumerate-api.py

diff --git a/statsmodels/stats/multivariate.py b/statsmodels/stats/multivariate.py
index c3a19e945ff..97dc27a5628 100644
--- a/statsmodels/stats/multivariate.py
+++ b/statsmodels/stats/multivariate.py
@@ -13,8 +13,10 @@
 from statsmodels.stats.base import HolderTuple
 from statsmodels.tools.validation import array_like
 
+
 # shortcut function
-logdet = lambda x: np.linalg.slogdet(x)[1]  # noqa: E731
+def _logdet(x):
+    return np.linalg.slogdet(x)[1]
 
 
 def test_mvmean(data, mean_null=0, return_results=True):
@@ -302,7 +304,7 @@ def test_cov(cov, nobs, cov_null):
 
     fact = nobs - 1.
     fact *= 1 - (2 * k + 1 - 2 / (k + 1)) / (6 * (n - 1) - 1)
-    fact2 = logdet(S0) - logdet(n / (n - 1) * S)
+    fact2 = _logdet(S0) - _logdet(n / (n - 1) * S)
     fact2 += np.trace(n / (n - 1) * np.linalg.solve(S0, S)) - k
     statistic = fact * fact2
     df = k * (k + 1) / 2
@@ -362,7 +364,7 @@ def test_cov_spherical(cov, nobs):
     k = cov.shape[0]
 
     statistic = nobs - 1 - (2 * k**2 + k + 2) / (6 * k)
-    statistic *= k * np.log(np.trace(cov)) - logdet(cov) - k * np.log(k)
+    statistic *= k * np.log(np.trace(cov)) - _logdet(cov) - k * np.log(k)
     df = k * (k + 1) / 2 - 1
     pvalue = stats.chi2.sf(statistic, df)
     return HolderTuple(statistic=statistic,
@@ -412,7 +414,7 @@ def test_cov_diagonal(cov, nobs):
     k = cov.shape[0]
     R = cov2corr(cov)
 
-    statistic = -(nobs - 1 - (2 * k + 5) / 6) * logdet(R)
+    statistic = -(nobs - 1 - (2 * k + 5) / 6) * _logdet(R)
     df = k * (k - 1) / 2
     pvalue = stats.chi2.sf(statistic, df)
     return HolderTuple(statistic=statistic,
@@ -486,12 +488,12 @@ def test_cov_blockdiagonal(cov, nobs, block_len):
     if k != sum(k_blocks):
         msg = "sample covariances and blocks do not have matching shape"
         raise ValueError(msg)
-    logdet_blocks = sum(logdet(c) for c in cov_blocks)
+    logdet_blocks = sum(_logdet(c) for c in cov_blocks)
     a2 = k**2 - sum(ki**2 for ki in k_blocks)
     a3 = k**3 - sum(ki**3 for ki in k_blocks)
 
     statistic = (nobs - 1 - (2 * a3 + 3 * a2) / (6. * a2))
-    statistic *= logdet_blocks - logdet(cov)
+    statistic *= logdet_blocks - _logdet(cov)
 
     df = a2 / 2
     pvalue = stats.chi2.sf(statistic, df)
@@ -556,8 +558,8 @@ def test_cov_oneway(cov_list, nobs_list):
 
     cov_pooled = sum((n - 1) * c for (n, c) in zip(nobs_list, cov_list))
     cov_pooled /= (nobs - m)
-    stat0 = (nobs - m) * logdet(cov_pooled)
-    stat0 -= sum((n - 1) * logdet(c) for (n, c) in zip(nobs_list, cov_list))
+    stat0 = (nobs - m) * _logdet(cov_pooled)
+    stat0 -= sum((n - 1) * _logdet(c) for (n, c) in zip(nobs_list, cov_list))
 
     # Box's chi2
     c1 = sum(1 / (n - 1) for n in nobs_list) - 1 / (nobs - m)
diff --git a/tools/enumerate-api.py b/tools/enumerate-api.py
new file mode 100644
index 00000000000..5f152170aca
--- /dev/null
+++ b/tools/enumerate-api.py
@@ -0,0 +1,284 @@
+from setuptools import find_packages
+
+import argparse
+import importlib
+import inspect
+import json
+import logging
+import os
+from pkgutil import iter_modules
+import sys
+
+FILE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def find_modules(path):
+    modules = set()
+    for pkg in find_packages(path):
+        modules.add(pkg)
+        pkgpath = path + "/" + pkg.replace(".", "/")
+        if sys.version_info.major == 2 or (
+            sys.version_info.major == 3 and sys.version_info.minor < 6
+        ):
+            for _, name, ispkg in iter_modules([pkgpath]):
+                if not ispkg:
+                    modules.add(pkg + "." + name)
+        else:
+            for info in iter_modules([pkgpath]):
+                if not info.ispkg:
+                    modules.add(pkg + "." + info.name)
+    return modules
+
+
+def update_class(func, funcs, class_name, full_name):
+    logger = logging.getLogger("enumerate-api")
+    class_api = {}
+    for v2 in dir(func):
+        if v2.startswith("_") and v2 != "__init__":
+            continue
+        method = getattr(func, v2)
+        if not (
+            inspect.isfunction(method)
+            or inspect.isclass(method)
+            or inspect.ismethod(method)
+            or isinstance(method, property)
+        ):
+            continue
+        if isinstance(method, property):
+            try:
+                name = f"{method.fget.__module__}.{class_name}.{v2}"
+                class_api[name] = tuple()
+            except Exception:
+                name = ""
+                pass
+        else:
+            sig = inspect.signature(method)
+            name = f"{method.__module__}.{class_name}.{v2}"
+            class_api[name] = tuple(k for k in sig.parameters.keys())
+        logger.info(name)
+    funcs[full_name] = class_api
+
+
+def walk_modules(path):
+    logger = logging.getLogger("enumerate-api")
+    modules = find_modules(path)
+    api = {"functions": {}, "classes": {}}
+    for mod in modules:
+        module = f"statsmodels.{mod}"
+        logger.info(module)
+        if (
+            ".sandbox" in module
+            or module.endswith(".tests")
+            or ".tests." in module
+        ):
+            continue
+        lib = None
+        try:
+            lib = importlib.import_module(module)
+        except Exception:
+            pass
+        if lib is None:
+            continue
+        for v in dir(lib):
+            if v.startswith("_"):
+                continue
+            func = getattr(lib, v)
+            if not (inspect.isfunction(func) or inspect.isclass(func)):
+                continue
+            if "statsmodels" not in func.__module__:
+                continue
+            name = f"{func.__module__}.{v}"
+            try:
+                if inspect.isfunction(func):
+                    d = api["functions"]
+                else:
+                    d = api["classes"]
+                sig = inspect.signature(func)
+                d[name] = tuple(k for k in sig.parameters.keys())
+            except Exception:
+                d[name] = tuple()
+            if inspect.isclass(func):
+                update_class(func, api["classes"], v, name)
+            logger.info(f"{module}.{v}")
+    return api
+
+
+def generate_diff(api, other):
+    api_classes = set(api["classes"].keys())
+    other_classes = set(other["classes"].keys())
+    new_classes = api_classes.difference(other_classes)
+    removed_classes = set(other_classes).difference(api_classes)
+    new_methods = {}
+    removed_methods = {}
+    changed_methods = {}
+    expanded_methods = {}
+    expanded_funcs = {}
+    changed_funcs = {}
+    common = api_classes.intersection(other_classes)
+    for key in common:
+        current_class = api["classes"][key]
+        other_class = other["classes"][key]
+        new = set(current_class.keys()).difference(other_class.keys())
+        for meth in new:
+            new_methods[meth] = current_class[meth]
+        removed = set(other_class.keys()).difference(current_class.keys())
+        for meth in removed:
+            removed_methods[meth] = tuple(other_class[meth])
+        common_methods = set(other_class.keys()).intersection(
+            current_class.keys()
+        )
+        for meth in common_methods:
+            if current_class[meth] != tuple(other_class[meth]):
+                if set(current_class[meth]).issuperset(other_class[meth]):
+                    expanded_methods[key] = set(
+                        current_class[meth]
+                    ).difference(other_class[meth])
+                else:
+                    changed_methods[key] = {
+                        "current": current_class[meth],
+                        "other": tuple(other_class[meth]),
+                    }
+
+    api_funcs = set(api["functions"].keys())
+    other_funcs = set(other["functions"].keys())
+    new_funcs = api_funcs.difference(other_funcs)
+    removed_funcs = set(other_funcs).difference(api_funcs)
+    common_funcs = api_funcs.intersection(other_funcs)
+    for key in common_funcs:
+        current_func = api["functions"][key]
+        other_func = other["functions"][key]
+        if current_func == tuple(other_func):
+            continue
+        elif set(current_func).issuperset(other_func):
+            expanded_funcs[key] = set(current_func).difference(other_func)
+        else:
+            changed_funcs[key] = {
+                "current": current_func,
+                "other": tuple(other_func),
+            }
+
+    def header(v, first=False):
+        return (
+            "\n\n" * (not first)
+            + "=" * len(v)
+            + f"\n{v}\n"
+            + "=" * len(v)
+            + "\n"
+        )
+
+    with open("api-differences.rst", "w") as rst:
+        rst.write(header("New Classes", first=True))
+        for val in sorted(new_classes):
+            rst.write(f"* :class:`{val}`\n")
+        rst.write(header("Removed Classes"))
+        for val in sorted(removed_classes):
+            rst.write(f"* ``{val}``\n")
+
+        rst.write(header("New Methods"))
+        for val in new_methods:
+            rst.write(f"* :method:`{val}`\n")
+
+        rst.write(header("Removed Methods"))
+        for val in removed_methods:
+            rst.write(f"* ``{val}``\n")
+
+        rst.write(header("Methods with New Arguments"))
+        for val in expanded_methods:
+            args = map(lambda v: f"``{v}``", expanded_methods[val])
+            rst.write(f"* :method:`{val}`: " + ", ".join(args) + "\n")
+
+        rst.write(header("Methods with Changed Arguments"))
+        for val in changed_methods:
+            rst.write(f"* :method:`{val}`\n")
+            name = val.split(".")[-1]
+            args = ", ".join(changed_methods[val]["current"])
+            if args.startswith("self"):
+                args = args[4:]
+                if args.startswith(", "):
+                    args = args[2:]
+            rst.write(f"   * New: ``{name}({args})``\n")
+            args = ", ".join(changed_methods[val]["other"])
+            if args.startswith("self"):
+                args = args[4:]
+                if args.startswith(", "):
+                    args = args[2:]
+            rst.write(f"   * Old: ``{name}({args})``\n")
+
+        rst.write(header("New Functions"))
+        for val in sorted(new_funcs):
+            rst.write(f"* :func:`{val}`\n")
+        rst.write(header("Removed Functions"))
+        for val in sorted(removed_funcs):
+            rst.write(f"* ``{val}``\n")
+
+        rst.write(header("Functions with New Arguments"))
+        for val in expanded_funcs:
+            args = map(lambda v: f"``{v}``", expanded_funcs[val])
+            rst.write(f"* :func:`{val}`: " + ", ".join(args) + "\n")
+
+        rst.write(header("Functions with Changed Arguments"))
+        for val in changed_funcs:
+            rst.write(f"* :func:`{val}`\n")
+            name = val.split(".")[-1]
+            args = ", ".join(changed_funcs[val]["current"])
+            rst.write(f"   * New: ``{name}({args})``\n")
+            args = ", ".join(changed_funcs[val]["other"])
+            rst.write(f"   * Old: ``{name}({args})``\n")
+
+
+parser = argparse.ArgumentParser(
+    description="Store the current visible API as json"
+)
+parser.add_argument(
+    "--file-path",
+    "-fp",
+    type=str,
+    default=None,
+    help="Path to the root directory. If not provided, assumed to be be"
+    "the import location of statsmodels.",
+)
+parser.add_argument(
+    "--out-file",
+    "-of",
+    type=str,
+    default=None,
+    help="Name of output json file. Default is statsmodels-{version}-api.json",
+)
+parser.add_argument(
+    "--diff", "-d", type=str, default=None, help="json file to diff"
+)
+
+
+def main():
+    args = parser.parse_args()
+
+    logger = logging.getLogger("enumerate-api")
+    logger.setLevel(logging.INFO)
+    ch = logging.StreamHandler()
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    ch.setFormatter(formatter)
+    logger.addHandler(ch)
+
+    file_path = args.file_path
+    if file_path is None:
+        import statsmodels
+
+        file_path = os.path.dirname(statsmodels.__file__)
+    current_api = walk_modules(file_path)
+    out_file = args.out_file
+    if out_file is None:
+        import statsmodels
+
+        out_file = f"statsmodels-{statsmodels.__version__}-api.json"
+    with open(out_file, "w") as api:
+        json.dump(current_api, api, indent=2, sort_keys=True)
+    if args.diff is not None:
+        with open(args.diff, "r") as other:
+            other_api = json.load(other)
+        generate_diff(current_api, other_api)
+
+
+if __name__ == "__main__":
+    main()