From 576b362e9b4fb1f956feb63f69ff70dcce770ab7 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sat, 25 Jul 2020 23:51:32 +0100 Subject: [PATCH] ENH: Add tool to simpligy documenting API in release notes Add tool to walk API and generate a JSON --- statsmodels/stats/multivariate.py | 18 +- tools/enumerate-api.py | 284 ++++++++++++++++++++++++++++++ 2 files changed, 294 insertions(+), 8 deletions(-) create mode 100644 tools/enumerate-api.py diff --git a/statsmodels/stats/multivariate.py b/statsmodels/stats/multivariate.py index c3a19e945ff..97dc27a5628 100644 --- a/statsmodels/stats/multivariate.py +++ b/statsmodels/stats/multivariate.py @@ -13,8 +13,10 @@ from statsmodels.stats.base import HolderTuple from statsmodels.tools.validation import array_like + # shortcut function -logdet = lambda x: np.linalg.slogdet(x)[1] # noqa: E731 +def _logdet(x): + return np.linalg.slogdet(x)[1] def test_mvmean(data, mean_null=0, return_results=True): @@ -302,7 +304,7 @@ def test_cov(cov, nobs, cov_null): fact = nobs - 1. fact *= 1 - (2 * k + 1 - 2 / (k + 1)) / (6 * (n - 1) - 1) - fact2 = logdet(S0) - logdet(n / (n - 1) * S) + fact2 = _logdet(S0) - _logdet(n / (n - 1) * S) fact2 += np.trace(n / (n - 1) * np.linalg.solve(S0, S)) - k statistic = fact * fact2 df = k * (k + 1) / 2 @@ -362,7 +364,7 @@ def test_cov_spherical(cov, nobs): k = cov.shape[0] statistic = nobs - 1 - (2 * k**2 + k + 2) / (6 * k) - statistic *= k * np.log(np.trace(cov)) - logdet(cov) - k * np.log(k) + statistic *= k * np.log(np.trace(cov)) - _logdet(cov) - k * np.log(k) df = k * (k + 1) / 2 - 1 pvalue = stats.chi2.sf(statistic, df) return HolderTuple(statistic=statistic, @@ -412,7 +414,7 @@ def test_cov_diagonal(cov, nobs): k = cov.shape[0] R = cov2corr(cov) - statistic = -(nobs - 1 - (2 * k + 5) / 6) * logdet(R) + statistic = -(nobs - 1 - (2 * k + 5) / 6) * _logdet(R) df = k * (k - 1) / 2 pvalue = stats.chi2.sf(statistic, df) return HolderTuple(statistic=statistic, @@ -486,12 +488,12 @@ def test_cov_blockdiagonal(cov, nobs, block_len): if k != sum(k_blocks): msg = "sample covariances and blocks do not have matching shape" raise ValueError(msg) - logdet_blocks = sum(logdet(c) for c in cov_blocks) + logdet_blocks = sum(_logdet(c) for c in cov_blocks) a2 = k**2 - sum(ki**2 for ki in k_blocks) a3 = k**3 - sum(ki**3 for ki in k_blocks) statistic = (nobs - 1 - (2 * a3 + 3 * a2) / (6. * a2)) - statistic *= logdet_blocks - logdet(cov) + statistic *= logdet_blocks - _logdet(cov) df = a2 / 2 pvalue = stats.chi2.sf(statistic, df) @@ -556,8 +558,8 @@ def test_cov_oneway(cov_list, nobs_list): cov_pooled = sum((n - 1) * c for (n, c) in zip(nobs_list, cov_list)) cov_pooled /= (nobs - m) - stat0 = (nobs - m) * logdet(cov_pooled) - stat0 -= sum((n - 1) * logdet(c) for (n, c) in zip(nobs_list, cov_list)) + stat0 = (nobs - m) * _logdet(cov_pooled) + stat0 -= sum((n - 1) * _logdet(c) for (n, c) in zip(nobs_list, cov_list)) # Box's chi2 c1 = sum(1 / (n - 1) for n in nobs_list) - 1 / (nobs - m) diff --git a/tools/enumerate-api.py b/tools/enumerate-api.py new file mode 100644 index 00000000000..5f152170aca --- /dev/null +++ b/tools/enumerate-api.py @@ -0,0 +1,284 @@ +from setuptools import find_packages + +import argparse +import importlib +import inspect +import json +import logging +import os +from pkgutil import iter_modules +import sys + +FILE_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def find_modules(path): + modules = set() + for pkg in find_packages(path): + modules.add(pkg) + pkgpath = path + "/" + pkg.replace(".", "/") + if sys.version_info.major == 2 or ( + sys.version_info.major == 3 and sys.version_info.minor < 6 + ): + for _, name, ispkg in iter_modules([pkgpath]): + if not ispkg: + modules.add(pkg + "." + name) + else: + for info in iter_modules([pkgpath]): + if not info.ispkg: + modules.add(pkg + "." + info.name) + return modules + + +def update_class(func, funcs, class_name, full_name): + logger = logging.getLogger("enumerate-api") + class_api = {} + for v2 in dir(func): + if v2.startswith("_") and v2 != "__init__": + continue + method = getattr(func, v2) + if not ( + inspect.isfunction(method) + or inspect.isclass(method) + or inspect.ismethod(method) + or isinstance(method, property) + ): + continue + if isinstance(method, property): + try: + name = f"{method.fget.__module__}.{class_name}.{v2}" + class_api[name] = tuple() + except Exception: + name = "" + pass + else: + sig = inspect.signature(method) + name = f"{method.__module__}.{class_name}.{v2}" + class_api[name] = tuple(k for k in sig.parameters.keys()) + logger.info(name) + funcs[full_name] = class_api + + +def walk_modules(path): + logger = logging.getLogger("enumerate-api") + modules = find_modules(path) + api = {"functions": {}, "classes": {}} + for mod in modules: + module = f"statsmodels.{mod}" + logger.info(module) + if ( + ".sandbox" in module + or module.endswith(".tests") + or ".tests." in module + ): + continue + lib = None + try: + lib = importlib.import_module(module) + except Exception: + pass + if lib is None: + continue + for v in dir(lib): + if v.startswith("_"): + continue + func = getattr(lib, v) + if not (inspect.isfunction(func) or inspect.isclass(func)): + continue + if "statsmodels" not in func.__module__: + continue + name = f"{func.__module__}.{v}" + try: + if inspect.isfunction(func): + d = api["functions"] + else: + d = api["classes"] + sig = inspect.signature(func) + d[name] = tuple(k for k in sig.parameters.keys()) + except Exception: + d[name] = tuple() + if inspect.isclass(func): + update_class(func, api["classes"], v, name) + logger.info(f"{module}.{v}") + return api + + +def generate_diff(api, other): + api_classes = set(api["classes"].keys()) + other_classes = set(other["classes"].keys()) + new_classes = api_classes.difference(other_classes) + removed_classes = set(other_classes).difference(api_classes) + new_methods = {} + removed_methods = {} + changed_methods = {} + expanded_methods = {} + expanded_funcs = {} + changed_funcs = {} + common = api_classes.intersection(other_classes) + for key in common: + current_class = api["classes"][key] + other_class = other["classes"][key] + new = set(current_class.keys()).difference(other_class.keys()) + for meth in new: + new_methods[meth] = current_class[meth] + removed = set(other_class.keys()).difference(current_class.keys()) + for meth in removed: + removed_methods[meth] = tuple(other_class[meth]) + common_methods = set(other_class.keys()).intersection( + current_class.keys() + ) + for meth in common_methods: + if current_class[meth] != tuple(other_class[meth]): + if set(current_class[meth]).issuperset(other_class[meth]): + expanded_methods[key] = set( + current_class[meth] + ).difference(other_class[meth]) + else: + changed_methods[key] = { + "current": current_class[meth], + "other": tuple(other_class[meth]), + } + + api_funcs = set(api["functions"].keys()) + other_funcs = set(other["functions"].keys()) + new_funcs = api_funcs.difference(other_funcs) + removed_funcs = set(other_funcs).difference(api_funcs) + common_funcs = api_funcs.intersection(other_funcs) + for key in common_funcs: + current_func = api["functions"][key] + other_func = other["functions"][key] + if current_func == tuple(other_func): + continue + elif set(current_func).issuperset(other_func): + expanded_funcs[key] = set(current_func).difference(other_func) + else: + changed_funcs[key] = { + "current": current_func, + "other": tuple(other_func), + } + + def header(v, first=False): + return ( + "\n\n" * (not first) + + "=" * len(v) + + f"\n{v}\n" + + "=" * len(v) + + "\n" + ) + + with open("api-differences.rst", "w") as rst: + rst.write(header("New Classes", first=True)) + for val in sorted(new_classes): + rst.write(f"* :class:`{val}`\n") + rst.write(header("Removed Classes")) + for val in sorted(removed_classes): + rst.write(f"* ``{val}``\n") + + rst.write(header("New Methods")) + for val in new_methods: + rst.write(f"* :method:`{val}`\n") + + rst.write(header("Removed Methods")) + for val in removed_methods: + rst.write(f"* ``{val}``\n") + + rst.write(header("Methods with New Arguments")) + for val in expanded_methods: + args = map(lambda v: f"``{v}``", expanded_methods[val]) + rst.write(f"* :method:`{val}`: " + ", ".join(args) + "\n") + + rst.write(header("Methods with Changed Arguments")) + for val in changed_methods: + rst.write(f"* :method:`{val}`\n") + name = val.split(".")[-1] + args = ", ".join(changed_methods[val]["current"]) + if args.startswith("self"): + args = args[4:] + if args.startswith(", "): + args = args[2:] + rst.write(f" * New: ``{name}({args})``\n") + args = ", ".join(changed_methods[val]["other"]) + if args.startswith("self"): + args = args[4:] + if args.startswith(", "): + args = args[2:] + rst.write(f" * Old: ``{name}({args})``\n") + + rst.write(header("New Functions")) + for val in sorted(new_funcs): + rst.write(f"* :func:`{val}`\n") + rst.write(header("Removed Functions")) + for val in sorted(removed_funcs): + rst.write(f"* ``{val}``\n") + + rst.write(header("Functions with New Arguments")) + for val in expanded_funcs: + args = map(lambda v: f"``{v}``", expanded_funcs[val]) + rst.write(f"* :func:`{val}`: " + ", ".join(args) + "\n") + + rst.write(header("Functions with Changed Arguments")) + for val in changed_funcs: + rst.write(f"* :func:`{val}`\n") + name = val.split(".")[-1] + args = ", ".join(changed_funcs[val]["current"]) + rst.write(f" * New: ``{name}({args})``\n") + args = ", ".join(changed_funcs[val]["other"]) + rst.write(f" * Old: ``{name}({args})``\n") + + +parser = argparse.ArgumentParser( + description="Store the current visible API as json" +) +parser.add_argument( + "--file-path", + "-fp", + type=str, + default=None, + help="Path to the root directory. If not provided, assumed to be be" + "the import location of statsmodels.", +) +parser.add_argument( + "--out-file", + "-of", + type=str, + default=None, + help="Name of output json file. Default is statsmodels-{version}-api.json", +) +parser.add_argument( + "--diff", "-d", type=str, default=None, help="json file to diff" +) + + +def main(): + args = parser.parse_args() + + logger = logging.getLogger("enumerate-api") + logger.setLevel(logging.INFO) + ch = logging.StreamHandler() + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + ch.setFormatter(formatter) + logger.addHandler(ch) + + file_path = args.file_path + if file_path is None: + import statsmodels + + file_path = os.path.dirname(statsmodels.__file__) + current_api = walk_modules(file_path) + out_file = args.out_file + if out_file is None: + import statsmodels + + out_file = f"statsmodels-{statsmodels.__version__}-api.json" + with open(out_file, "w") as api: + json.dump(current_api, api, indent=2, sort_keys=True) + if args.diff is not None: + with open(args.diff, "r") as other: + other_api = json.load(other) + generate_diff(current_api, other_api) + + +if __name__ == "__main__": + main()