Find functions in need of a ```:ref:``` in the docstring

In [4]:
import sys
from pprint import pprint
from time import time
from IPython.display import clear_output
import pickle

if 'docstring_refs' in sys.modules:
    del sys.modules['docstring_refs']

from docstring_refs import find_all_py_files, extract_functions_docstrings, remove_internal_functions, remove_referenced_docstrings, find_files_using_functions, remove_unused_functions

directory = '../scikit-learn/sklearn'

py_files = find_all_py_files(directory)

In [5]:
functions_info = dict()

start_time = time()
for i, file in enumerate(py_files, start=1):

    print(file)

    print('Extracting functions & docstrings...')
    info = extract_functions_docstrings(file)
    prev_len = len(info)
    print(f'{prev_len} functions found')

    print('Removing internal functions...')
    info = remove_internal_functions(info)
    print(f'{prev_len - len(info)} functions removed')
    prev_len = len(info)

    print('Removing functions with reference...')
    info = remove_referenced_docstrings(info)
    print(f'{prev_len - len(info)} functions removed')
    prev_len = len(info)

    print('Finding example files...')
    info = find_files_using_functions(info, '../scikit-learn/examples')

    print('Removing unused functions...')
    info = remove_unused_functions(info)
    print(f'{prev_len - len(info)} functions removed')
    prev_len = len(info)

    functions_info.update(info)

    with open('functions_info.pkl', 'wb') as file:
        pickle.dump(functions_info, file, protocol=pickle.HIGHEST_PROTOCOL)

    pc_complete = i/len(py_files)
    elapsed_time = time() - start_time

    clear_output(wait=True)

    print(f'{pc_complete*100:.1f}% complete | {len(functions_info)} functions | Estimated time remaining: {(1-pc_complete)*(elapsed_time/max(pc_complete,1e-12)):.0f} seconds')

100.0% complete | 102 functions | Estimated time remaining: 0 seconds


Find functions that have both a ```:ref:``` and a written out example in the docstring

In [6]:
referenced_info = dict()

start_time = time()
for i, file in enumerate(py_files, start=1):

    print(file)

    print('Extracting functions & docstrings...')
    info = extract_functions_docstrings(file)
    prev_len = len(info)
    print(f'{prev_len} functions found')

    print('Removing internal functions...')
    info = remove_internal_functions(info)
    print(f'{prev_len - len(info)} functions removed')
    prev_len = len(info)

    print('Removing functions without reference...')
    unreferenced = remove_referenced_docstrings(info)
    for key in unreferenced.keys():
        del info[key]
    print(f'{prev_len - len(info)} functions removed')
    prev_len = len(info)

    print('Removing functions without written example...')
    keys_to_remove = []
    for key, value in info.items():
        docstring = value.docstring
        if not isinstance(docstring, str) or "Example" not in docstring:
            keys_to_remove.append(key)
    for key in keys_to_remove:
        del info[key]
    print(f'{prev_len - len(info)} functions removed')
    prev_len = len(info)

    print('Finding example files...')
    info = find_files_using_functions(info, '../scikit-learn/examples')

    referenced_info.update(info)

    with open('referenced_info.pkl', 'wb') as file:
        pickle.dump(referenced_info, file, protocol=pickle.HIGHEST_PROTOCOL)

    pc_complete = i/len(py_files)
    elapsed_time = time() - start_time

    clear_output(wait=True)

    print(f'{pc_complete*100:.1f}% complete | {len(referenced_info)} functions | Estimated time remaining: {(1-pc_complete)*(elapsed_time/max(pc_complete,1e-12)):.0f} seconds')

100.0% complete | 15 functions | Estimated time remaining: 0 seconds
