Skip to content

Commit

Permalink
Merge pull request #1 from alexmojaki/assert_nodes_equal
Browse files Browse the repository at this point in the history
Use simpler and more robust tree equality checking instead of astroid as_string
  • Loading branch information
alexmojaki committed Oct 20, 2019
2 parents 457b8ca + 4b65fd2 commit 931aa80
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 94 deletions.
34 changes: 34 additions & 0 deletions tests/test_astroid.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,49 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function

import re

import astroid
from . import tools, test_mark_tokens


class TestAstroid(test_mark_tokens.TestMarkTokens):

is_astroid_test = True
module = astroid

@classmethod
def create_mark_checker(cls, source):
builder = astroid.builder.AstroidBuilder()
tree = builder.string_build(source)
return tools.MarkChecker(source, tree=tree)

def assert_nodes_equal(self, node1, node2):
self.assertEqual(
repr_tree(node1),
repr_tree(node2),
)


def repr_tree(node):
"""
Returns a canonical string representation of an astroid node
normalised to ignore the context of each node which can change when parsing
substrings of source code.
E.g. "a" is a Name in expression "a + 1" and is an AssignName in expression "a = 1",
but we don't care about this difference when comparing structure and content.
"""
result = node.repr_tree()

# astroid represents context in multiple ways
# Convert Store and Del contexts to Load
# Similarly convert Assign/Del Name/Attr to just Name/Attribute (i.e. Load)
result = re.sub(r'(AssignName|DelName)(\(\s*name=)', r'Name\2', result)
result = re.sub(r'(AssignAttr|DelAttr)(\(\s*attrname=)', r'Attribute\2', result)
result = re.sub(r'ctx=<Context\.(Store: 2|Del: 3)>', r'ctx=<Context.Load: 1>', result)

# Weird bug in astroid that collapses spaces in docstrings sometimes maybe
result = re.sub(r"' +\\n'", r"'\\n'", result)

return result
12 changes: 0 additions & 12 deletions tests/test_asttokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,18 +79,6 @@ def test_token_methods(self):
self.assertEqual(atok.find_token(atok.tokens[5], tokenize.NL), atok.tokens[9])
self.assertTrue(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NL).type))


def test_to_source(self):
# Verify that to_source() actually works, with a couple of cases that have caused hiccups.
source = "foo(a, b, *d, c=2, **e)"
root = ast.parse(source)
self.assertEqual(tools.to_source(root.body[0]), source)

source = 'def foo():\n """xxx"""\n None'
root = ast.parse(source).body[0]
self.assertEqual(tools.to_source(root).strip(), source)


def test_unicode_offsets(self):
# ast modules provides utf8 offsets, while tokenize uses unicode offsets. Make sure we
# translate correctly.
Expand Down
78 changes: 74 additions & 4 deletions tests/test_mark_tokens.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function

import ast
import inspect
import io
import os
import re
import sys
import textwrap
import unittest

import astroid
import six
from asttokens import util

from . import tools


class TestMarkTokens(unittest.TestCase):
maxDiff = None

# We use the same test cases to test both nodes produced by the built-in `ast` module, and by
# the `astroid` library. The latter derives TestAstroid class from TestMarkTokens. For checks
# that differ between them, .is_astroid_test allows to distinguish.
is_astroid_test = False
module = ast

@classmethod
def create_mark_checker(cls, source):
Expand Down Expand Up @@ -582,13 +587,78 @@ def test_sys_modules(self):
continue
m = self.create_mark_checker(source)

# Astroid has at least two weird bugs involving metaclasses
if self.is_astroid_test and 'metaclass=' in m.atok.tree.as_string():
continue

m.verify_all_nodes(self)

if six.PY3:
def test_dict_merge(self):
m = self.create_mark_checker("{**{}}")
m.verify_all_nodes(self)

def parse_snippet(self, text, node):
"""
Returns the parsed AST tree for the given text, handling issues with indentation and newlines
when text is really an extracted part of larger code.
"""
# If text is indented, it's a statement, and we need to put in a scope for indents to be valid
# (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or
# multiline strings). If text is an expression but has newlines, we parenthesize it to make it
# parsable.
# For expressions and statements, we add a dummy statement '_' before it because if it's just a
# string contained in an astroid.Const or astroid.Expr it will end up in the doc attribute and be
# a pain to extract for comparison
indented = re.match(r'^[ \t]+\S', text)
if indented:
return self.module.parse('def dummy():\n' + text).body[0].body[0]
if util.is_expr(node):
return self.module.parse('_\n(' + text + ')').body[1].value
if util.is_module(node):
return self.module.parse(text)
return self.module.parse('_\n' + text).body[1]

def test_assert_nodes_equal(self):
"""
Checks that assert_nodes_equal actually fails when given different nodes
"""

def check(s1, s2):
n1 = self.module.parse(s1)
n2 = self.module.parse(s2)
with self.assertRaises(AssertionError):
self.assert_nodes_equal(n1, n2)

check('a', 'b')
check('a*b', 'a+b')
check('a*b', 'b*a')
check('(a and b) or c', 'a and (b or c)')
check('a = 1', 'a = 2')
check('a = 1', 'a += 1')
check('a *= 1', 'a += 1')
check('[a for a in []]', '[a for a in ()]')
check("for x in y: pass", "for x in y: fail")
check("1", "1.0")
check("foo(a, b, *d, c=2, **e)",
"foo(a, b, *d, c=2.0, **e)")
check("foo(a, b, *d, c=2, **e)",
"foo(a, b, *d, c=2)")
check('def foo():\n """xxx"""\n None',
'def foo():\n """xx"""\n None')

def assert_nodes_equal(self, t1, t2):
if isinstance(t1, ast.expr_context):
# Ignore the context of each node which can change when parsing
# substrings of source code. We just want equal structure and contents.
self.assertIsInstance(t2, ast.expr_context)
return

self.assertEqual(type(t1), type(t2))
if isinstance(t1, (list, tuple)):
self.assertEqual(len(t1), len(t2))
for vc1, vc2 in zip(t1, t2):
self.assert_nodes_equal(vc1, vc2)
elif isinstance(t1, ast.AST):
self.assert_nodes_equal(
list(ast.iter_fields(t1)),
list(ast.iter_fields(t2)),
)
else:
self.assertEqual(t1, t2)
81 changes: 3 additions & 78 deletions tests/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,75 +17,6 @@ def read_fixture(*path_parts):
with io.open(get_fixture_path(*path_parts), "r", newline="\n") as f:
return f.read()

def _parse_stmt(text):
# ast.parse produces a module, but we here want to produce a single statement.
return ast.parse(text, 'exec').body[0]

def parse_snippet(text, is_expr=False, is_module=False):
"""
Returns the parsed AST tree for the given text, handling issues with indentation and newlines
when text is really an extracted part of larger code.
"""
# If text is indented, it's a statement, and we need to put in a scope for indents to be valid
# (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or
# multiline strings). If text is an expression but has newlines, we parenthesize it to make it
# parsable.
indented = re.match(r'^[ \t]+\S', text)
if indented:
return _parse_stmt('def dummy():\n' + text).body[0]
if is_expr:
return _parse_stmt('(' + text + ')').value
if is_module:
return ast.parse(text, 'exec')
return _parse_stmt(text)


def to_source(node):
"""
Convert a node to source code by converting it to an astroid tree first, and using astroid's
as_string() method.
"""
if hasattr(node, 'as_string'):
return node.as_string()

builder = astroid.rebuilder.TreeRebuilder(astroid.manager.AstroidManager())
# We need to make a copy of node that astroid can process.
node_copy = create_astroid_ast(node)
if isinstance(node, ast.Module):
anode = builder.visit_module(node_copy, '', '', '')
else:
# Anything besides Module needs to have astroid Module passed in as a parent.
amodule = astroid.nodes.Module('', None)
anode = builder.visit(node_copy, amodule)
return anode.as_string()

def create_astroid_ast(node):
if hasattr(astroid, "_ast"):
# A bit of a hack, reaching into astroid, but we need to re-create the tree with the parser
# module that astroid understands, to be able to use TreeRebuilder on it.
parser_module = astroid._ast._get_parser_module() # pylint: disable=no-member,protected-access
else:
parser_module = ast
return ConvertAST(parser_module).visit(node)

class ConvertAST(ast.NodeVisitor):
"""Allows converting from ast nodes to typed_ast.ast27 or typed_ast.ast3 nodes."""
def __init__(self, ast_module):
self._ast_module = ast_module

def visit(self, node):
converted_class = getattr(self._ast_module, node.__class__.__name__)
new_node = converted_class()
for field, old_value in ast.iter_fields(node):
new_value = ([self.maybe_visit(n) for n in old_value] if isinstance(old_value, list) else
self.maybe_visit(old_value))
setattr(new_node, field, new_value)
for attr in getattr(node, '_attributes', ()):
setattr(new_node, attr, getattr(node, attr))
return new_node

def maybe_visit(self, node):
return self.visit(node) if isinstance(node, ast.AST) else node

def collect_nodes_preorder(root):
"""Returns a list of all nodes using pre-order traversal (i.e. parent before children)."""
Expand Down Expand Up @@ -157,16 +88,10 @@ def verify_all_nodes(self, test_case):
# but in isolation it's invalid syntax
text = re.sub(r'^(\s*)elif(\W)', r'\1if\2', text, re.MULTILINE)

rebuilt_node = parse_snippet(text, is_expr=util.is_expr(node), is_module=util.is_module(node))

# Now we need to check if the two nodes are equivalent.
left = _yield_fix(to_source(rebuilt_node))
right = _yield_fix(to_source(node))
test_case.assertEqual(left, right)
rebuilt_node = test_case.parse_snippet(text, node)
test_case.assert_nodes_equal(node, rebuilt_node)
tested_nodes += 1

return tested_nodes

# Yield nodes are parenthesized depending on context; to ease verifications, parenthesize always.
def _yield_fix(text):
return "(" + text + ")" if text.startswith("yield") else text

0 comments on commit 931aa80

Please sign in to comment.