-
-
Notifications
You must be signed in to change notification settings - Fork 503
/
user_context.py
374 lines (332 loc) · 13.8 KB
/
user_context.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
import re
import os
import keyword
from collections import namedtuple
from jedi import cache
from jedi import common
from jedi.parser import tokenize, ParserWithRecovery
from jedi._compatibility import u
from jedi.parser import token
from jedi.parser.fast import FastParser
from jedi.parser import tree
from jedi import debug
from jedi.common import PushBackIterator
# TODO this should be part of the tokenizer not just of this user_context.
Token = namedtuple('Token', ['type', 'string', 'start_pos', 'prefix'])
REPLACE_STR = r"[bBuU]?[rR]?" + (r"(?:(')[^\n'\\]*(?:\\.[^\n'\\]*)*(?:'|$)" +
'|' +
r'(")[^\n"\\]*(?:\\.[^\n"\\]*)*(?:"|$))')
REPLACE_STR = re.compile(REPLACE_STR)
class UserContext(object):
"""
:param source: The source code of the file.
:param position: The position, the user is currently in. Only important \
for the main file.
"""
def __init__(self, source, position):
self.source = source
self.position = position
self._line_cache = None
self._relevant_temp = None
@cache.underscore_memoization
def get_path_until_cursor(self):
""" Get the path under the cursor. """
path, self._start_cursor_pos = self._calc_path_until_cursor(self.position)
return path
def _backwards_line_generator(self, start_pos):
self._line_temp, self._column_temp = start_pos
first_line = self.get_line(start_pos[0])[:self._column_temp]
self._line_length = self._column_temp
yield first_line[::-1] + '\n'
while True:
self._line_temp -= 1
line = self.get_line(self._line_temp)
self._line_length = len(line)
yield line[::-1] + '\n'
def _get_backwards_tokenizer(self, start_pos, line_gen=None):
if line_gen is None:
line_gen = self._backwards_line_generator(start_pos)
token_gen = tokenize.generate_tokens(lambda: next(line_gen))
for typ, tok_str, tok_start_pos, prefix in token_gen:
line = self.get_line(self._line_temp)
# Calculate the real start_pos of the token.
if tok_start_pos[0] == 1:
# We are in the first checked line
column = start_pos[1] - tok_start_pos[1]
else:
column = len(line) - tok_start_pos[1]
# Multi-line docstrings must be accounted for.
first_line = common.splitlines(tok_str)[0]
column -= len(first_line)
# Reverse the token again, so that it is in normal order again.
yield Token(typ, tok_str[::-1], (self._line_temp, column), prefix[::-1])
def _calc_path_until_cursor(self, start_pos):
"""
Something like a reverse tokenizer that tokenizes the reversed strings.
"""
open_brackets = ['(', '[', '{']
close_brackets = [')', ']', '}']
start_cursor = start_pos
gen = PushBackIterator(self._get_backwards_tokenizer(start_pos))
string = u('')
level = 0
force_point = False
last_type = None
is_first = True
for tok_type, tok_str, tok_start_pos, prefix in gen:
if is_first:
if prefix: # whitespace is not a path
return u(''), start_cursor
is_first = False
if last_type == tok_type == tokenize.NAME:
string = ' ' + string
if level:
if tok_str in close_brackets:
level += 1
elif tok_str in open_brackets:
level -= 1
elif tok_str == '.':
force_point = False
elif force_point:
# Reversed tokenizing, therefore a number is recognized as a
# floating point number.
# The same is true for string prefixes -> represented as a
# combination of string and name.
if tok_type == tokenize.NUMBER and tok_str[-1] == '.' \
or tok_type == tokenize.NAME and last_type == tokenize.STRING \
and tok_str.lower() in ('b', 'u', 'r', 'br', 'ur'):
force_point = False
else:
break
elif tok_str in close_brackets:
level += 1
elif tok_type in [tokenize.NAME, tokenize.STRING]:
if keyword.iskeyword(tok_str) and string:
# If there's already something in the string, a keyword
# never adds any meaning to the current statement.
break
force_point = True
elif tok_type == tokenize.NUMBER:
pass
else:
if tok_str == '-':
next_tok = next(gen)
if next_tok[1] == 'e':
gen.push_back(next_tok)
else:
break
else:
break
start_cursor = tok_start_pos
string = tok_str + prefix + string
last_type = tok_type
# Don't need whitespace around a statement.
return string.strip(), start_cursor
def get_path_under_cursor(self):
"""
Return the path under the cursor. If there is a rest of the path left,
it will be added to the stuff before it.
"""
return self.get_path_until_cursor() + self.get_path_after_cursor()
def get_path_after_cursor(self):
line = self.get_line(self.position[0])
return re.search("[\w\d]*", line[self.position[1]:]).group(0)
def get_operator_under_cursor(self):
line = self.get_line(self.position[0])
after = re.match("[^\w\s]+", line[self.position[1]:])
before = re.match("[^\w\s]+", line[:self.position[1]][::-1])
return (before.group(0) if before is not None else '') \
+ (after.group(0) if after is not None else '')
def call_signature(self):
"""
:return: Tuple of string of the call and the index of the cursor.
"""
def get_line(pos):
def simplify_str(match):
"""
To avoid having strings without end marks (error tokens) and
strings that just screw up all the call signatures, just
simplify everything.
"""
mark = match.group(1) or match.group(2)
return mark + ' ' * (len(match.group(0)) - 2) + mark
line_gen = self._backwards_line_generator(pos)
for line in line_gen:
# We have to switch the already backwards lines twice, because
# we scan them from start.
line = line[::-1]
modified = re.sub(REPLACE_STR, simplify_str, line)
yield modified[::-1]
index = 0
level = 0
next_must_be_name = False
next_is_key = False
key_name = None
generator = self._get_backwards_tokenizer(self.position, get_line(self.position))
for tok_type, tok_str, start_pos, prefix in generator:
if tok_str in tokenize.ALWAYS_BREAK_TOKENS:
break
elif next_must_be_name:
if tok_type == tokenize.NUMBER:
# If there's a number at the end of the string, it will be
# tokenized as a number. So add it to the name.
tok_type, t, _, _ = next(generator)
if tok_type == tokenize.NAME:
end_pos = start_pos[0], start_pos[1] + len(tok_str)
call, start_pos = self._calc_path_until_cursor(start_pos=end_pos)
return call, index, key_name, start_pos
index = 0
next_must_be_name = False
elif next_is_key:
if tok_type == tokenize.NAME:
key_name = tok_str
next_is_key = False
if tok_str == '(':
level += 1
if level == 1:
next_must_be_name = True
level = 0
elif tok_str == ')':
level -= 1
elif tok_str == ',':
index += 1
elif tok_str == '=':
next_is_key = True
return None, 0, None, (0, 0)
def get_reverse_context(self, yield_positions=False):
"""
Returns the token strings in reverse order from the start position.
"""
self.get_path_until_cursor() # In case _start_cursor_pos is undefined.
pos = self._start_cursor_pos
while True:
# Remove non important white space.
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
line = self.get_line(pos[0] - 1)
if line and line[-1] == '\\':
pos = pos[0] - 1, len(line) - 1
continue
else:
break
if line[pos[1] - 1].isspace():
pos = pos[0], pos[1] - 1
else:
break
try:
result, pos = self._calc_path_until_cursor(start_pos=pos)
if yield_positions:
yield pos
else:
yield result
except StopIteration:
if yield_positions:
yield None
else:
yield ''
def get_backwards_context_tokens(self):
self.get_path_until_cursor() # In case _start_cursor_pos is undefined.
pos = self._start_cursor_pos
while True:
# Remove non important white space.
line = self.get_line(pos[0])
while True:
if pos[1] == 0:
line = self.get_line(pos[0] - 1)
if line and line[-1] == '\\':
pos = pos[0] - 1, len(line) - 1
continue
else:
break
if line[pos[1] - 1].isspace():
pos = pos[0], pos[1] - 1
else:
break
try:
token_ = next(self._get_backwards_tokenizer(pos))
pos = token_.start_pos
yield token_
except StopIteration:
# Make it clear that there's nothing coming anymore.
#yield Token('', token.ENDMARKER, (1, 0), '')
break
def get_line(self, line_nr):
if not self._line_cache:
self._line_cache = common.splitlines(self.source)
if line_nr == 0:
# This is a fix for the zeroth line. We need a newline there, for
# the backwards parser.
return u('')
if line_nr < 0:
raise StopIteration()
try:
return self._line_cache[line_nr - 1]
except IndexError:
raise StopIteration()
def get_position_line(self):
return self.get_line(self.position[0])[:self.position[1]]
class UserContextParser(object):
def __init__(self, grammar, source, path, position, user_context,
parser_done_callback, use_fast_parser=True):
self._grammar = grammar
self._source = source
self._path = path and os.path.abspath(path)
self._position = position
self._user_context = user_context
self._use_fast_parser = use_fast_parser
self._parser_done_callback = parser_done_callback
@cache.underscore_memoization
def _parser(self):
cache.invalidate_star_import_cache(self._path)
if self._use_fast_parser:
parser = FastParser(self._grammar, self._source, self._path)
# Don't pickle that module, because the main module is changing quickly
cache.save_parser(self._path, parser, pickling=False)
else:
parser = ParserWithRecovery(self._grammar, self._source, self._path)
self._parser_done_callback(parser)
return parser
@cache.underscore_memoization
def user_stmt(self):
module = self.module()
debug.speed('parsed')
return module.get_statement_for_position(self._position)
@cache.underscore_memoization
def user_stmt_with_whitespace(self):
"""
Returns the statement under the cursor even if the statement lies
before the cursor.
"""
user_stmt = self.user_stmt()
if not user_stmt:
# for statements like `from x import ` (cursor not in statement)
# or `abs( ` where the cursor is out in the whitespace.
if self._user_context.get_path_under_cursor():
# We really should have a user_stmt, but the parser couldn't
# process it - probably a Syntax Error (or in a comment).
debug.warning('No statement under the cursor.')
return
pos = next(self._user_context.get_reverse_context(yield_positions=True))
user_stmt = self.module().get_statement_for_position(pos)
return user_stmt
@cache.underscore_memoization
def user_scope(self):
"""
Returns the scope in which the user resides. This includes flows.
"""
user_stmt = self.user_stmt()
if user_stmt is None:
def scan(scope):
for s in scope.children:
if s.start_pos <= self._position <= s.end_pos:
if isinstance(s, (tree.Scope, tree.Flow)):
return scan(s) or s
elif s.type in ('suite', 'decorated'):
return scan(s)
return None
return scan(self.module()) or self.module()
else:
return user_stmt.get_parent_scope(include_flows=True)
def module(self):
return self._parser().module