## Docstrings Mining

In [25]:
import ast
import showast
import pandas as pd

### 1. AST-based approach
- DFS $\rightarrow$ collect all `FunctionDef` nodes from tree

In [60]:
def get_func_defs(tree):
    funcs = [n for n in tree.body if isinstance(n, ast.FunctionDef)]
    methods = []
    
    classes = [n for n in tree.body if isinstance(n, ast.ClassDef)]
    for cls in classes:
        methods += [(cls.name, n) for n in cls.body if isinstance(n, ast.FunctionDef)]
        
    return funcs, methods


def mine(module):
    out = []
    
    with open(module.__file__) as f:
        defs = f.read()
    
    tree = ast.parse(defs)
    funcs, methods = get_func_defs(tree)

    for f in funcs:
        args = [a.arg for a in f.args.args]
        out += [('function', f.name, args, ast.get_docstring(f))]
    
    for (c, m) in methods:
        args = [a.arg for a in m.args.args]
        out += [(f'method @{c}', f.name, args, ast.get_docstring(f))]
            
    return pd.DataFrame(out, columns=['type', 'name', 'args', 'doc'])

#### Testing

In [74]:
import numpy, re, scipy, string, os

module = re
print(module.__file__)

with open(module.__file__, 'rt') as f:
    defs = f.read()
    
df = mine(module)
df.dropna().sort_values(by='name')

/home/alex/anaconda3/lib/python3.7/re.py


Unnamed: 0,type,name,args,doc
8,function,compile,"[pattern, flags]","Compile a regular expression pattern, returnin..."
11,function,escape,[pattern],Escape special characters in a string.
6,function,findall,"[pattern, string, flags]",Return a list of all non-overlapping matches i...
7,function,finditer,"[pattern, string, flags]",Return an iterator over all non-overlapping ma...
1,function,fullmatch,"[pattern, string, flags]","Try to apply the pattern to all of the string,..."
0,function,match,"[pattern, string, flags]",Try to apply the pattern at the start of the s...
9,function,purge,[],Clear the regular expression caches
2,function,search,"[pattern, string, flags]",Scan through string looking for a match to the...
5,function,split,"[pattern, string, maxsplit, flags]",Split the source string by the occurrences of ...
3,function,sub,"[pattern, repl, string, count, flags]",Return the string obtained by replacing the le...


In [None]:
tree.body[8].orelse[7].names[0]
# tree.body[0].value
# print(defs)
numpy.core