In [27]:
import importlib
import inspect

import types
import ast
import re


In [21]:
class DocstringExtractor:
    def __init__(self, module_name):
        self.module_name = module_name
        self.module = importlib.import_module(module_name)

    
    def remove_docstrings(self, source):
        """
        Removes the docstrings from the source code
        """
        pattern = r'^(\s*([\'\"]{3}|#{1,3}))[\s\S]*?\2'
        stripped = re.sub(pattern,'', source, flags=re.M)
        stripped_lines = [l.rstrip() for l in stripped.split("\n") if l.strip()!=""]
        return "\n".join(stripped_lines)






    def extract_docstrings(self):
        docstrings = {}
        for name, obj in self.module.__dict__.items():
            if inspect.isclass(obj):
                for m_name, method in inspect.getmembers(obj, inspect.isfunction):
                    if method.__doc__:
                        if hasattr(method, '__module__'):
                            try:
                                raw_source = inspect.getsource(method)
                                code = self.remove_docstrings(raw_source)
                            except TypeError:
                                raw_source = ""
                                code = ""
                            docstrings[f'{name}.{m_name}'] = (raw_source, code, method.__doc__)
            elif callable(obj) and obj.__doc__:
                if hasattr(obj, '__module__'):
                    try:
                        raw_source = inspect.getsource(obj)
                        code = self.remove_docstrings(raw_source)
                    except TypeError:
                        raw_source = ""
                        code = ""
                    docstrings[name] = (raw_source, code, obj.__doc__)
        return docstrings


In [38]:
extractor = DocstringExtractor("numpy")
docstrings = extractor.extract_docstrings()
i=0
max_i=3
for k in docstrings.keys():
    # for each write Original, Code, Docstring with the name of the module
    # print("original:",k)
    # print(docstrings[k][0])
    print("code:",k)
    print(docstrings[k][1])
    print("docstring:",k)
    print(docstrings[k][2])

code: show_config
def show():
    from numpy.core._multiarray_umath import (
        __cpu_features__, __cpu_baseline__, __cpu_dispatch__
    )
    for name,info_dict in globals().items():
        if name[0] == "_" or type(info_dict) is not type({}): continue
        print(name + ":")
        if not info_dict:
            print("  NOT AVAILABLE")
        for k,v in info_dict.items():
            v = str(v)
            if k == "sources" and len(v) > 200:
                v = v[:60] + " ...\n... " + v[-60:]
            print("    %s = %s" % (k,v))
    features_found, features_not_found = [], []
    for feature in __cpu_dispatch__:
        if __cpu_features__[feature]:
            features_found.append(feature)
        else:
            features_not_found.append(feature)
    print("Supported SIMD extensions in this NumPy install:")
    print("    baseline = %s" % (','.join(__cpu_baseline__)))
    print("    found = %s" % (','.join(features_found)))
    print("    not found = %s" % (','.joi

In [36]:
import test_fishing007
extractor = DocstringExtractor("test_fishing007")
docstrings = extractor.extract_docstrings()
for k in docstrings.keys():
    # for each write Original, Code, Docstring with the name of the module
    print("original:",k)
    print(docstrings[k][0])
    print("code:",k)
    print(docstrings[k][1])
    print("docstring:",k)
    print(docstrings[k][2])

original: test
def test():
    """my test lol
    one two three"""
    return "test"

code: test
def test():
    return "test"
docstring: test
my test lol
    one two three
original: two
def two():
    """ Returns 2 """
    return 2

code: two
def two():
    return 2
docstring: two
 Returns 2 
original: test2
def test2():
    """
    Test one tow
    """
    return None

code: test2
def test2():
    return None
docstring: test2

    Test one tow
    


In [34]:
target_key = 'two'
#Complete method
docstrings['target_key'][0]

'def two():\n    """ Returns 2 """\n    return 2\n'

In [32]:
#only source code

print (docstrings['target_key'][1])

def test():
    return "test"


In [33]:
#only docstrings for functions

print (docstrings['test'][2])

my test lol
    one two three
