## Docstrings Mining

In [None]:
import ast
import showast
import pandas as pd

### 1. AST-based approach
- DFS $\rightarrow$ collect all `FunctionDef` nodes from tree

In [None]:
def get_func_defs(tree):
    funcs = [n for n in tree.body if isinstance(n, ast.FunctionDef)]
    methods = []
    
    classes = [n for n in tree.body if isinstance(n, ast.ClassDef)]
    for cls in classes:
        methods += [(cls.name, n) for n in cls.body if isinstance(n, ast.FunctionDef)]
        
    return funcs, methods


def mine(module):
    out = []
    
    with open(module.__file__) as f:
        defs = f.read()
    
    tree = ast.parse(defs)
    funcs, methods = get_func_defs(tree)

    for f in funcs:
        args = [a.arg for a in f.args.args]
        out += [('function', f.name, args, ast.get_docstring(f))]
    
    for (c, m) in methods:
        args = [a.arg for a in m.args.args]
        out += [(f'method @{c}', f.name, args, ast.get_docstring(f))]
            
    return pd.DataFrame(out, columns=['type', 'name', 'args', 'doc'])

#### Testing

In [None]:
import numpy, re, scipy, string, os

module = re
print(module.__file__)

with open(module.__file__, 'rt') as f:
    defs = f.read()
    
df = mine(module)
df.dropna().sort_values(by='name')

In [None]:
tree.body[8].orelse[7].names[0]
# tree.body[0].value
# print(defs)
numpy.core