Merge pull request #1 from alexmojaki/assert_nodes_equal

Use simpler and more robust tree equality checking instead of astroid as_string
gristlabs · Oct 20, 2019 · 931aa80 · 931aa80
2 parents 457b8ca + 4b65fd2
commit 931aa80
Show file tree

Hide file tree

Showing 4 changed files with 111 additions and 94 deletions.
diff --git a/tests/test_astroid.py b/tests/test_astroid.py
@@ -1,15 +1,49 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals, print_function
+
+import re
+
 import astroid
 from . import tools, test_mark_tokens
 
 
 class TestAstroid(test_mark_tokens.TestMarkTokens):
 
   is_astroid_test = True
+  module = astroid
 
   @classmethod
   def create_mark_checker(cls, source):
     builder = astroid.builder.AstroidBuilder()
     tree = builder.string_build(source)
     return tools.MarkChecker(source, tree=tree)
+
+  def assert_nodes_equal(self, node1, node2):
+    self.assertEqual(
+      repr_tree(node1),
+      repr_tree(node2),
+    )
+
+
+def repr_tree(node):
+  """
+  Returns a canonical string representation of an astroid node
+  normalised to ignore the context of each node which can change when parsing
+  substrings of source code.
+
+  E.g. "a" is a Name in expression "a + 1" and is an AssignName in expression "a = 1",
+  but we don't care about this difference when comparing structure and content.
+  """
+  result = node.repr_tree()
+
+  # astroid represents context in multiple ways
+  # Convert Store and Del contexts to Load
+  # Similarly convert Assign/Del Name/Attr to just Name/Attribute (i.e. Load)
+  result = re.sub(r'(AssignName|DelName)(\(\s*name=)', r'Name\2', result)
+  result = re.sub(r'(AssignAttr|DelAttr)(\(\s*attrname=)', r'Attribute\2', result)
+  result = re.sub(r'ctx=<Context\.(Store: 2|Del: 3)>', r'ctx=<Context.Load: 1>', result)
+
+  # Weird bug in astroid that collapses spaces in docstrings sometimes maybe
+  result = re.sub(r"' +\\n'", r"'\\n'", result)
+
+  return result
diff --git a/tests/test_asttokens.py b/tests/test_asttokens.py
@@ -79,18 +79,6 @@ def test_token_methods(self):
     self.assertEqual(atok.find_token(atok.tokens[5], tokenize.NL), atok.tokens[9])
     self.assertTrue(token.ISEOF(atok.find_token(atok.tokens[5], tokenize.NL).type))
 
-
-  def test_to_source(self):
-    # Verify that to_source() actually works, with a couple of cases that have caused hiccups.
-    source = "foo(a, b, *d, c=2, **e)"
-    root = ast.parse(source)
-    self.assertEqual(tools.to_source(root.body[0]), source)
-
-    source = 'def foo():\n    """xxx"""\n    None'
-    root = ast.parse(source).body[0]
-    self.assertEqual(tools.to_source(root).strip(), source)
-
-
   def test_unicode_offsets(self):
     # ast modules provides utf8 offsets, while tokenize uses unicode offsets. Make sure we
     # translate correctly.

diff --git a/tests/test_mark_tokens.py b/tests/test_mark_tokens.py
@@ -1,25 +1,30 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals, print_function
 
+import ast
 import inspect
 import io
 import os
+import re
 import sys
 import textwrap
 import unittest
 
 import astroid
 import six
+from asttokens import util
 
 from . import tools
 
 
 class TestMarkTokens(unittest.TestCase):
+  maxDiff = None
 
   # We use the same test cases to test both nodes produced by the built-in `ast` module, and by
   # the `astroid` library. The latter derives TestAstroid class from TestMarkTokens. For checks
   # that differ between them, .is_astroid_test allows to distinguish.
   is_astroid_test = False
+  module = ast
 
   @classmethod
   def create_mark_checker(cls, source):
@@ -582,13 +587,78 @@ def test_sys_modules(self):
         continue
       m = self.create_mark_checker(source)
 
-      # Astroid has at least two weird bugs involving metaclasses
-      if self.is_astroid_test and 'metaclass=' in m.atok.tree.as_string():
-        continue
-
       m.verify_all_nodes(self)
 
   if six.PY3:
     def test_dict_merge(self):
       m = self.create_mark_checker("{**{}}")
       m.verify_all_nodes(self)
+
+  def parse_snippet(self, text, node):
+    """
+    Returns the parsed AST tree for the given text, handling issues with indentation and newlines
+    when text is really an extracted part of larger code.
+    """
+    # If text is indented, it's a statement, and we need to put in a scope for indents to be valid
+    # (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or
+    # multiline strings). If text is an expression but has newlines, we parenthesize it to make it
+    # parsable.
+    # For expressions and statements, we add a dummy statement '_' before it because if it's just a
+    # string contained in an astroid.Const or astroid.Expr it will end up in the doc attribute and be
+    # a pain to extract for comparison
+    indented = re.match(r'^[ \t]+\S', text)
+    if indented:
+      return self.module.parse('def dummy():\n' + text).body[0].body[0]
+    if util.is_expr(node):
+      return self.module.parse('_\n(' + text + ')').body[1].value
+    if util.is_module(node):
+      return self.module.parse(text)
+    return self.module.parse('_\n' + text).body[1]
+
+  def test_assert_nodes_equal(self):
+    """
+    Checks that assert_nodes_equal actually fails when given different nodes
+    """
+
+    def check(s1, s2):
+      n1 = self.module.parse(s1)
+      n2 = self.module.parse(s2)
+      with self.assertRaises(AssertionError):
+        self.assert_nodes_equal(n1, n2)
+
+    check('a', 'b')
+    check('a*b', 'a+b')
+    check('a*b', 'b*a')
+    check('(a and b) or c', 'a and (b or c)')
+    check('a = 1', 'a = 2')
+    check('a = 1', 'a += 1')
+    check('a *= 1', 'a += 1')
+    check('[a for a in []]', '[a for a in ()]')
+    check("for x in y: pass", "for x in y: fail")
+    check("1", "1.0")
+    check("foo(a, b, *d, c=2, **e)",
+          "foo(a, b, *d, c=2.0, **e)")
+    check("foo(a, b, *d, c=2, **e)",
+          "foo(a, b, *d, c=2)")
+    check('def foo():\n    """xxx"""\n    None',
+          'def foo():\n    """xx"""\n    None')
+
+  def assert_nodes_equal(self, t1, t2):
+    if isinstance(t1, ast.expr_context):
+      # Ignore the context of each node which can change when parsing
+      # substrings of source code. We just want equal structure and contents.
+      self.assertIsInstance(t2, ast.expr_context)
+      return
+
+    self.assertEqual(type(t1), type(t2))
+    if isinstance(t1, (list, tuple)):
+      self.assertEqual(len(t1), len(t2))
+      for vc1, vc2 in zip(t1, t2):
+        self.assert_nodes_equal(vc1, vc2)
+    elif isinstance(t1, ast.AST):
+      self.assert_nodes_equal(
+        list(ast.iter_fields(t1)),
+        list(ast.iter_fields(t2)),
+      )
+    else:
+      self.assertEqual(t1, t2)
diff --git a/tests/tools.py b/tests/tools.py
@@ -17,75 +17,6 @@ def read_fixture(*path_parts):
   with io.open(get_fixture_path(*path_parts), "r", newline="\n") as f:
     return f.read()
 
-def _parse_stmt(text):
-  # ast.parse produces a module, but we here want to produce a single statement.
-  return ast.parse(text, 'exec').body[0]
-
-def parse_snippet(text, is_expr=False, is_module=False):
-  """
-  Returns the parsed AST tree for the given text, handling issues with indentation and newlines
-  when text is really an extracted part of larger code.
-  """
-  # If text is indented, it's a statement, and we need to put in a scope for indents to be valid
-  # (using textwrap.dedent is insufficient because some lines may not indented, e.g. comments or
-  # multiline strings). If text is an expression but has newlines, we parenthesize it to make it
-  # parsable.
-  indented = re.match(r'^[ \t]+\S', text)
-  if indented:
-    return _parse_stmt('def dummy():\n' + text).body[0]
-  if is_expr:
-    return _parse_stmt('(' + text + ')').value
-  if is_module:
-    return ast.parse(text, 'exec')
-  return _parse_stmt(text)
-
-
-def to_source(node):
-  """
-  Convert a node to source code by converting it to an astroid tree first, and using astroid's
-  as_string() method.
-  """
-  if hasattr(node, 'as_string'):
-    return node.as_string()
-
-  builder = astroid.rebuilder.TreeRebuilder(astroid.manager.AstroidManager())
-  # We need to make a copy of node that astroid can process.
-  node_copy = create_astroid_ast(node)
-  if isinstance(node, ast.Module):
-    anode = builder.visit_module(node_copy, '', '', '')
-  else:
-    # Anything besides Module needs to have astroid Module passed in as a parent.
-    amodule = astroid.nodes.Module('', None)
-    anode = builder.visit(node_copy, amodule)
-  return anode.as_string()
-
-def create_astroid_ast(node):
-  if hasattr(astroid, "_ast"):
-    # A bit of a hack, reaching into astroid, but we need to re-create the tree with the parser
-    # module that astroid understands, to be able to use TreeRebuilder on it.
-    parser_module = astroid._ast._get_parser_module()   # pylint: disable=no-member,protected-access
-  else:
-    parser_module = ast
-  return ConvertAST(parser_module).visit(node)
-
-class ConvertAST(ast.NodeVisitor):
-  """Allows converting from ast nodes to typed_ast.ast27 or typed_ast.ast3 nodes."""
-  def __init__(self, ast_module):
-    self._ast_module = ast_module
-
-  def visit(self, node):
-    converted_class = getattr(self._ast_module, node.__class__.__name__)
-    new_node = converted_class()
-    for field, old_value in ast.iter_fields(node):
-      new_value = ([self.maybe_visit(n) for n in old_value] if isinstance(old_value, list) else
-                   self.maybe_visit(old_value))
-      setattr(new_node, field, new_value)
-    for attr in getattr(node, '_attributes', ()):
-      setattr(new_node, attr, getattr(node, attr))
-    return new_node
-
-  def maybe_visit(self, node):
-    return self.visit(node) if isinstance(node, ast.AST) else node
 
 def collect_nodes_preorder(root):
   """Returns a list of all nodes using pre-order traversal (i.e. parent before children)."""
@@ -157,16 +88,10 @@ def verify_all_nodes(self, test_case):
       # but in isolation it's invalid syntax
       text = re.sub(r'^(\s*)elif(\W)', r'\1if\2', text, re.MULTILINE)
 
-      rebuilt_node = parse_snippet(text, is_expr=util.is_expr(node), is_module=util.is_module(node))
-
-      # Now we need to check if the two nodes are equivalent.
-      left = _yield_fix(to_source(rebuilt_node))
-      right = _yield_fix(to_source(node))
-      test_case.assertEqual(left, right)
+      rebuilt_node = test_case.parse_snippet(text, node)
+      test_case.assert_nodes_equal(node, rebuilt_node)
       tested_nodes += 1
 
     return tested_nodes
 
-# Yield nodes are parenthesized depending on context; to ease verifications, parenthesize always.
-def _yield_fix(text):
-  return "(" + text + ")" if text.startswith("yield") else text
+