Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AST splitter #332

Merged
merged 9 commits into from
Apr 7, 2011
38 changes: 17 additions & 21 deletions IPython/core/compilerop.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
-------
* Robert Kern
* Fernando Perez
* Thomas Kluyver
"""

# Note: though it might be more natural to name this module 'compiler', that
Expand Down Expand Up @@ -51,12 +52,12 @@ def code_name(code, number=0):
# Classes and functions
#-----------------------------------------------------------------------------

class CachingCompiler(object):
class CachingCompiler(codeop.Compile):
"""A compiler that caches code compiled from interactive statements.
"""

def __init__(self):
self._compiler = codeop.CommandCompiler()
codeop.Compile.__init__(self)

# This is ugly, but it must be done this way to allow multiple
# simultaneous ipython instances to coexist. Since Python itself
Expand All @@ -81,35 +82,30 @@ def __init__(self):
def compiler_flags(self):
"""Flags currently active in the compilation process.
"""
return self._compiler.compiler.flags
return self.flags

def cache(self, code, number=0):
"""Make a name for a block of code, and cache the code.

def __call__(self, code, symbol, number=0):
"""Compile some code while caching its contents such that the inspect
module can find it later.

Parameters
----------
code : str
Source code to be compiled, one or more lines.

symbol : str
One of 'single', 'exec' or 'eval' (see the builtin ``compile``
documentation for further details on these fields).

number : int, optional
An integer argument identifying the code, useful for informational
purposes in tracebacks (typically it will be the IPython prompt
number).
The Python source code to cache.
number : int
A number which forms part of the code's name. Used for the execution
counter.

Returns
-------
The name of the cached code (as a string). Pass this as the filename
argument to compilation, so that tracebacks are correctly hooked up.
"""
name = code_name(code, number)
code_obj = self._compiler(code, name, symbol)
entry = (len(code), time.time(),
[line+'\n' for line in code.splitlines()], name)
# Cache the info both in the linecache (a global cache used internally
# by most of Python's inspect/traceback machinery), and in our cache
linecache.cache[name] = entry
linecache._ipython_cache[name] = entry
return code_obj
return name

def check_cache(self, *args):
"""Call linecache.checkcache() safely protecting our cached values.
Expand Down
162 changes: 6 additions & 156 deletions IPython/core/inputsplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,78 +166,6 @@ def get_input_encoding():
# Classes and functions for normal Python syntax handling
#-----------------------------------------------------------------------------

# HACK! This implementation, written by Robert K a while ago using the
# compiler module, is more robust than the other one below, but it expects its
# input to be pure python (no ipython syntax). For now we're using it as a
# second-pass splitter after the first pass transforms the input to pure
# python.

def split_blocks(python):
""" Split multiple lines of code into discrete commands that can be
executed singly.

Parameters
----------
python : str
Pure, exec'able Python code.

Returns
-------
commands : list of str
Separate commands that can be exec'ed independently.
"""
# compiler.parse treats trailing spaces after a newline as a
# SyntaxError. This is different than codeop.CommandCompiler, which
# will compile the trailng spaces just fine. We simply strip any
# trailing whitespace off. Passing a string with trailing whitespace
# to exec will fail however. There seems to be some inconsistency in
# how trailing whitespace is handled, but this seems to work.
python_ori = python # save original in case we bail on error
python = python.strip()

# The compiler module will parse the code into an abstract syntax tree.
# This has a bug with str("a\nb"), but not str("""a\nb""")!!!
try:
code_ast = ast.parse(python)
except:
return [python_ori]

# Uncomment to help debug the ast tree
# for n in code_ast.body:
# print n.lineno,'->',n

# Each separate command is available by iterating over ast.node. The
# lineno attribute is the line number (1-indexed) beginning the commands
# suite.
# lines ending with ";" yield a Discard Node that doesn't have a lineno
# attribute. These nodes can and should be discarded. But there are
# other situations that cause Discard nodes that shouldn't be discarded.
# We might eventually discover other cases where lineno is None and have
# to put in a more sophisticated test.
linenos = [x.lineno-1 for x in code_ast.body if x.lineno is not None]

# When we finally get the slices, we will need to slice all the way to
# the end even though we don't have a line number for it. Fortunately,
# None does the job nicely.
linenos.append(None)

# Same problem at the other end: sometimes the ast tree has its
# first complete statement not starting on line 0. In this case
# we might miss part of it. This fixes ticket 266993. Thanks Gael!
linenos[0] = 0

lines = python.splitlines()

# Create a list of atomic commands.
cmds = []
for i, j in zip(linenos[:-1], linenos[1:]):
cmd = lines[i:j]
if cmd:
cmds.append('\n'.join(cmd)+'\n')

return cmds


class InputSplitter(object):
"""An object that can split Python source input in executable blocks.

Expand Down Expand Up @@ -445,96 +373,18 @@ def push_accepts_more(self):
if not self._full_dedent:
return False
else:
nblocks = len(split_blocks(''.join(self._buffer)))
if nblocks==1:
try:
code_ast = ast.parse(u''.join(self._buffer))
except Exception:
return False
else:
if len(code_ast.body) == 1:
return False

# When input is complete, then termination is marked by an extra blank
# line at the end.
last_line = self.source.splitlines()[-1]
return bool(last_line and not last_line.isspace())

def split_blocks(self, lines):
"""Split a multiline string into multiple input blocks.

Note: this method starts by performing a full reset().

Parameters
----------
lines : str
A possibly multiline string.

Returns
-------
blocks : list
A list of strings, each possibly multiline. Each string corresponds
to a single block that can be compiled in 'single' mode (unless it
has a syntax error)."""

# This code is fairly delicate. If you make any changes here, make
# absolutely sure that you do run the full test suite and ALL tests
# pass.

self.reset()
blocks = []

# Reversed copy so we can use pop() efficiently and consume the input
# as a stack
lines = lines.splitlines()[::-1]
# Outer loop over all input
while lines:
#print 'Current lines:', lines # dbg
# Inner loop to build each block
while True:
# Safety exit from inner loop
if not lines:
break
# Grab next line but don't push it yet
next_line = lines.pop()
# Blank/empty lines are pushed as-is
if not next_line or next_line.isspace():
self.push(next_line)
continue

# Check indentation changes caused by the *next* line
indent_spaces, _full_dedent = self._find_indent(next_line)

# If the next line causes a dedent, it can be for two differnt
# reasons: either an explicit de-dent by the user or a
# return/raise/pass statement. These MUST be handled
# separately:
#
# 1. the first case is only detected when the actual explicit
# dedent happens, and that would be the *first* line of a *new*
# block. Thus, we must put the line back into the input buffer
# so that it starts a new block on the next pass.
#
# 2. the second case is detected in the line before the actual
# dedent happens, so , we consume the line and we can break out
# to start a new block.

# Case 1, explicit dedent causes a break.
# Note: check that we weren't on the very last line, else we'll
# enter an infinite loop adding/removing the last line.
if _full_dedent and lines and not next_line.startswith(' '):
lines.append(next_line)
break

# Otherwise any line is pushed
self.push(next_line)

# Case 2, full dedent with full block ready:
if _full_dedent or \
self.indent_spaces==0 and not self.push_accepts_more():
break
# Form the new block with the current source input
blocks.append(self.source_reset())

#return blocks
# HACK!!! Now that our input is in blocks but guaranteed to be pure
# python syntax, feed it back a second time through the AST-based
# splitter, which is more accurate than ours.
return split_blocks(''.join(blocks))

#------------------------------------------------------------------------
# Private interface
Expand Down
Loading