-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
ipy_lookfor.py
234 lines (193 loc) · 6.95 KB
/
ipy_lookfor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
"""
IPython extension: %lookfor command for searching docstrings
"""
# Pauli Virtanen <pav@iki.fi>, 2008.
import re, inspect, pkgutil, pydoc
#------------------------------------------------------------------------------
# Lookfor functionality
#------------------------------------------------------------------------------
# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}
# where kind: "func", "class", "module", "object"
# and index: index in breadth-first namespace traversal
_lookfor_caches = {}
# regexp whose match indicates that the string may contain a function signature
_function_signature_re = re.compile(r"[a-z_]+\(.*[,=].*\)", re.I)
def lookfor(what, modules=None, import_modules=True, regenerate=False):
"""
Search for objects whose documentation contains all given words.
Shows a summary of matching objects, sorted roughly by relevance.
Parameters
----------
what : str
String containing words to look for.
module : str, module
Module whose docstrings to go through.
import_modules : bool
Whether to import sub-modules in packages.
Will import only modules in __all__
regenerate: bool
Re-generate the docstring cache
"""
# Cache
cache = {}
for module in modules:
try:
c = _lookfor_generate_cache(module, import_modules, regenerate)
cache.update(c)
except ImportError:
pass
# Search
# XXX: maybe using a real stemming search engine would be better?
found = []
whats = str(what).lower().split()
if not whats: return
for name, (docstring, kind, index) in cache.iteritems():
if kind in ('module', 'object'):
# don't show modules or objects
continue
ok = True
doc = docstring.lower()
for w in whats:
if w not in doc:
ok = False
break
if ok:
found.append(name)
# Relevance sort
# XXX: this is full Harrison-Stetson heuristics now,
# XXX: it probably could be improved
kind_relevance = {'func': 1000, 'class': 1000,
'module': -1000, 'object': -1000}
def relevance(name, docstr, kind, index):
r = 0
# do the keywords occur within the start of the docstring?
first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])
r += sum([200 for w in whats if w in first_doc])
# do the keywords occur in the function name?
r += sum([30 for w in whats if w in name])
# is the full name long?
r += -len(name) * 5
# is the object of bad type?
r += kind_relevance.get(kind, -1000)
# is the object deep in namespace hierarchy?
r += -name.count('.') * 10
r += max(-index / 100, -100)
return r
def relevance_sort(a, b):
dr = relevance(b, *cache[b]) - relevance(a, *cache[a])
if dr != 0: return dr
else: return cmp(a, b)
found.sort(relevance_sort)
# Pretty-print
s = "Search results for '%s'" % (' '.join(whats))
help_text = [s, "-"*len(s)]
for name in found:
doc, kind, ix = cache[name]
doclines = [line.strip() for line in doc.strip().split("\n")
if line.strip()]
# find a suitable short description
try:
first_doc = doclines[0].strip()
if _function_signature_re.search(first_doc):
first_doc = doclines[1].strip()
except IndexError:
first_doc = ""
help_text.append("%s\n %s" % (name, first_doc))
# Output
if len(help_text) > 10:
pager = pydoc.getpager()
pager("\n".join(help_text))
else:
print "\n".join(help_text)
def _lookfor_generate_cache(module, import_modules, regenerate):
"""
Generate docstring cache for given module.
Parameters
----------
module : str, None, module
Module for which to generate docstring cache
import_modules : bool
Whether to import sub-modules in packages.
Will import only modules in __all__
regenerate: bool
Re-generate the docstring cache
Returns
-------
cache : dict {obj_full_name: (docstring, kind, index), ...}
Docstring cache for the module, either cached one (regenerate=False)
or newly generated.
"""
global _lookfor_caches
if module is None:
module = "numpy"
if isinstance(module, str):
module = __import__(module)
if id(module) in _lookfor_caches and not regenerate:
return _lookfor_caches[id(module)]
# walk items and collect docstrings
cache = {}
_lookfor_caches[id(module)] = cache
seen = {}
index = 0
stack = [(module.__name__, module)]
while stack:
name, item = stack.pop(0)
if id(item) in seen: continue
seen[id(item)] = True
index += 1
kind = "object"
if inspect.ismodule(item):
kind = "module"
try:
_all = item.__all__
except AttributeError:
_all = None
# import sub-packages
if import_modules and hasattr(item, '__path__'):
for m in pkgutil.iter_modules(item.__path__):
if _all is not None and m[1] not in _all:
continue
try:
__import__("%s.%s" % (name, m[1]))
except ImportError:
continue
for n, v in inspect.getmembers(item):
if _all is not None and n not in _all:
continue
stack.append(("%s.%s" % (name, n), v))
elif inspect.isclass(item):
kind = "class"
for n, v in inspect.getmembers(item):
stack.append(("%s.%s" % (name, n), v))
elif callable(item):
kind = "func"
doc = inspect.getdoc(item)
if doc is not None:
cache[name] = (doc, kind, index)
return cache
#------------------------------------------------------------------------------
# IPython connectivity
#------------------------------------------------------------------------------
import IPython.ipapi
ip = IPython.ipapi.get()
_lookfor_modules = ['numpy', 'scipy']
def lookfor_f(self, arg=''):
r"""
Search for objects whose documentation contains all given words.
Shows a summary of matching objects, sorted roughly by relevance.
Usage
-----
%lookfor +numpy some words
Search module 'numpy'
%lookfor_modules numpy scipy
Set default modules whose docstrings to search
"""
lookfor(arg, modules=_lookfor_modules)
def lookfor_modules_f(self, arg=''):
global _lookfor_modules
if not arg:
print "Modules included in %lookfor search:", _lookfor_modules
else:
_lookfor_modules = arg.split()
ip.expose_magic('lookfor', lookfor_f)
ip.expose_magic('lookfor_modules', lookfor_modules_f)