Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: bukzor/RefactorLib
base: f9a5139b9b
...
head fork: bukzor/RefactorLib
compare: 595a82baec
Checking mergeability… Don't worry, you can still create the pull request.
  • 5 commits
  • 12 files changed
  • 0 commit comments
  • 2 contributors
View
26 refactorlib/cheetah/node.py
@@ -55,8 +55,8 @@ def add_comment(self, comment_text):
def make_comment(self, comment_text):
- comment = CheetahNode('Comment')
- comment_start = CheetahNode('CommentStart')
+ comment = self.makeelement('Comment')
+ comment_start = self.makeelement('CommentStart')
comment_start.text = '##'
comment_start.tail = ' ' + comment_text
@@ -91,25 +91,25 @@ def is_in_context(self, directive_string):
else:
return False
-def call(method, arguments):
+def call(method, arguments, makeelement):
"""
return an lxml node representing a call to a method, with arguments.
`method` is a string
`arguments` is an lxml node
"""
- call = CheetahNode('Placeholder')
+ call = makeelement('Placeholder')
- varstart = CheetahNode('CheetahVarStart')
+ varstart = makeelement('CheetahVarStart')
varstart.text = '$'
call.append(varstart)
- namechunks = CheetahNode('CheetahVarNameChunks')
+ namechunks = makeelement('CheetahVarNameChunks')
- name = CheetahNode('DottedName')
+ name = makeelement('DottedName')
name.text = method
namechunks.append(name)
- argstring = CheetahNode('CallArgsString')
+ argstring = makeelement('CallArgsString')
argstring.text = '('
argstring.append(arguments)
namechunks.append(argstring)
@@ -209,7 +209,7 @@ def remove_self(self):
class CheetahDirective(CheetahNodeBase):
def replace_directive(self, other):
if isinstance(other, basestring):
- var = CheetahNode('CheetahVar')
+ var = self.makeelement('CheetahVar')
try:
directive, var.text = other.split(None, 1)
except ValueError:
@@ -268,7 +268,7 @@ def get_end_directive(self):
# Look at sibling Directives after this node, take first one that is an EndDirective.
return self.xpath_one('./following-sibling::Directive[./EndDirective][1]')
-class CheetahNodeLookup(etree.PythonElementClassLookup):
+class NodeLookup(etree.PythonElementClassLookup):
"""
Specify how to assign Python classes to lxml objects.
see: http://lxml.de/element_classes.html#tree-based-element-class-lookup-in-python
@@ -285,9 +285,7 @@ def lookup(self, document, element):
else:
return CheetahNodeBase
-CHEETAH_PARSER = etree.XMLParser()
-CHEETAH_PARSER.set_element_class_lookup(CheetahNodeLookup())
-
-CheetahNode = CHEETAH_PARSER.makeelement
+node_lookup = NodeLookup()
+del NodeLookup # This is a singleton class.
__all__ = ('CheetahNode',)
View
12 refactorlib/cheetah/parse.py
@@ -176,14 +176,6 @@ def detect_encoding(source):
return None
def parse(cheetah_content, encoding=None):
- # TODO: port/generalize this behavior to python as well
- if encoding is None:
- encoding = detect_encoding(cheetah_content)
- if encoding:
- cheetah_content = unicode(cheetah_content, encoding)
- else:
- # I don't see why encoding=None is different from not specifying the encoding.
- cheetah_content = unicode(cheetah_content)
from Cheetah.Compiler import Compiler
# This is very screwy, but so is cheetah. Apologies.
@@ -201,8 +193,8 @@ def parse(cheetah_content, encoding=None):
dictnode = parser_data_to_dictnode(data, cheetah_content)
from refactorlib.parse import dictnode_to_lxml
- from refactorlib.cheetah.node import CheetahNode
- root = dictnode_to_lxml(dictnode, CheetahNode)
+ from refactorlib.cheetah.node import node_lookup
+ root = dictnode_to_lxml(dictnode, node_lookup, encoding)
return root
def remove_empty(data):
View
5 refactorlib/filetypes.py
@@ -25,6 +25,11 @@ def parser(self):
module = __import__('refactorlib.%s.parse' % self.name, fromlist=[None])
return getattr(module, 'parse')
+ @LazyProperty
+ def encoding_detector(self):
+ module = __import__('refactorlib.%s.parse' % self.name, fromlist=[None])
+ return getattr(module, 'detect_encoding')
+
class FileTypes(object):
"""
View
6 refactorlib/node.py
@@ -98,10 +98,6 @@ def one(mylist):
return mylist[0]
-parser_lookup = etree.ElementDefaultClassLookup(element=RefactorLibNodeBase)
-parser = etree.XMLParser()
-parser.set_element_class_lookup(parser_lookup)
-
-RefactorLibNode = parser.makeelement
+node_lookup = etree.ElementDefaultClassLookup(element=RefactorLibNodeBase)
__all__ = ('RefactorLibNodeBase',)
View
41 refactorlib/parse.py
@@ -2,9 +2,22 @@ def parse(filename, filetype=None, encoding=None):
from filetypes import FILETYPES
filetype = FILETYPES.detect_filetype(filename, filetype)
- return filetype.parser(open(filename).read(), encoding=encoding)
+ source = open(filename).read()
-def dictnode_to_lxml(tree, element_factory=None):
+ # If no encoding was explicitly specified, see if we can parse
+ # it out from the contents of the file.
+ if encoding is None:
+ encoding = filetype.encoding_detector(source)
+
+ if encoding:
+ source = unicode(source, encoding)
+ else:
+ # I don't see why encoding=None is different from not specifying the encoding.
+ source = unicode(source)
+
+ return filetype.parser(source, encoding)
+
+def dictnode_to_lxml(tree, node_lookup=None, encoding=None):
"""
Input: A dictionary-based representation of a node tree.
Output: An lxml representation of the same.
@@ -16,10 +29,13 @@ def dictnode_to_lxml(tree, element_factory=None):
attrs -- A dictionary of any extra attributes.
children -- An ordered list of more node-dictionaries.
"""
- if element_factory:
- Element = element_factory
- else:
- from node import RefactorLibNode as Element
+ if not node_lookup:
+ from node import node_lookup
+
+ from lxml.etree import XMLParser
+ lxml_parser_object = XMLParser(encoding=encoding)
+ lxml_parser_object.set_element_class_lookup(node_lookup)
+ Element = lxml_parser_object.makeelement
root = None
stack = [ (tree,root) ]
@@ -27,15 +43,22 @@ def dictnode_to_lxml(tree, element_factory=None):
while stack:
node, parent = stack.pop()
- lxmlnode = Element(node['name'], attrib=node['attrs'])
- lxmlnode.text = node['text']
- lxmlnode.tail = node['tail']
if parent is None:
+ # We use this roundabout method becuase the encoding is always set
+ # to 'UTF8' if we use parser.makeelement()
+ lxml_parser_object.feed('<trash></trash>')
+ lxmlnode = lxml_parser_object.close()
+ lxmlnode.tag = node['name']
+ lxmlnode.attrib.update(node['attrs'])
root = lxmlnode
else:
+ lxmlnode = Element(node['name'], attrib=node['attrs'])
parent.append(lxmlnode)
+ lxmlnode.text = node['text']
+ lxmlnode.tail = node['tail']
+
for child in reversed(node['children']):
stack.append((child, lxmlnode))
View
22 refactorlib/python/parse.py
@@ -1,14 +1,28 @@
+# regex taken from inducer/pudb's detect_encoding
+import re
+pythonEncodingDirectiveRE = re.compile("^\s*#.*coding[:=]\s*([-\w.]+)")
-def parse(python_contents, encoding=None):
+def detect_encoding(source):
+ # According to the PEP0263, the encoding directive must appear on one of the first two lines of the file
+ top_lines = source.split('\n', 2)[:2]
+
+ for line in top_lines:
+ encodingMatch = pythonEncodingDirectiveRE.search(line)
+ if encodingMatch:
+ return encodingMatch.group(1)
+
+ # We didn't find anything.
+ return None
+
+def parse(python_contents, encoding):
"""
- Given some python contents, as a string, return the lxml representation.
+ Given some python contents, as a unicode string, return the lxml representation.
"""
- #TODO: implement encoding
lib2to3_python = lib2to3_parse(python_contents)
dictnode_python = lib2to3_to_dictnode(lib2to3_python)
from refactorlib.parse import dictnode_to_lxml
- return dictnode_to_lxml(dictnode_python)
+ return dictnode_to_lxml(dictnode_python, encoding=encoding)
def lib2to3_parse(python_contents):
from lib2to3 import pygram, pytree
View
2  refactorlib/tests/cheetah/continuation.out
@@ -1,2 +0,0 @@
-7[?47h)0[?25lPuDB 2011.3 - ?:help n:next s:step into b:breakpoint o:output t:run to cursor !:python shell    1 from Cheetah.Parser import Parser  Variables:    2   args: tuple    3 DEBUG = True  kwargs: dict    4   name: 'getExpressionParts'    5 class InstrumentedMethod(object):  pudb: <module 'pudb' from '/nail/home/buck/mypy/lib/p   6  def __init__(self, method, parent):   ython2.6/site-packages/pudb/__init__.pyc'>    7  self.method = method  self: InstrumentedMethod    8  self.parent = parent  start_pos: 6    9       10  def __call__(self, *args, **kwargs):      11  # I want the data to be arranged in *call* order      12  start_pos = self.parent.pos()      13  name = self.method.__name__      14       15  if name == 'getExpressionParts':      16  import pudb; pudb.set_trace()  Stack:    17   >> __call__ [InstrumentedMethod] parse.py:18 >  18 mydata = [start_pos, None, name]   getExpression [InstrumentedParser] Parser.py:1210    19  self.parent.data.append(mydata)   __call__ [InstrumentedMethod] parse.py:20    20  result = self.method(*args, **kwargs) # Call the wrapped method.   eatSet [InstrumentedParser] Parser.py:2157    21  mydata[1] = self.parent.pos()   __call__ [InstrumentedMethod] parse.py:20    22    eatDirective [InstrumentedParser] Parser.py:1613    23  return result   __call__ [InstrumentedMethod] parse.py:20    24    parse [InstrumentedParser] Parser.py:1489    25 class AnyString(str):   compile [ModuleCompiler] Compiler.py:1687    26  'Represents "any string".'   parse parse.py:149    27  def startswith(self, other):   parse parse.py:5    28  return True   <module> <string>:1    29  def __eq__(self, other):      30  return True      31   Breakpoints:    32 from collections import defaultdict      33 class AutoDict(defaultdict):      34  "Like defaultdict, but auto-populates for .get() as well."      35  no_default = []      36  def get(self, key, default=no_default):      37  if default is self.no_default:      38  return self[key]      39  else:      40  return super(AutoDict, self).get(key, default)      41       42 class InstrumentedParser(Parser):      43  dont_care_methods = (      44  'getc', 'getRowCol', 'getRowColLine', 'getLine',    [?1002l[?1000l[?25h[?47l8
-[?25h
View
5 refactorlib/tests/python/parse_data/example2_utf8.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+
+# Some unicode strings
+x = u'I am a unicode string'
+y = u'I have wæird chàrs'
View
6 refactorlib/tests/python/parse_data/test_matches_known_good_parsing/example2_utf8.xml
@@ -0,0 +1,6 @@
+<file_input><simple_stmt><expr_stmt># -*- coding: utf-8 -*-
+
+# Some unicode strings
+<NAME>x</NAME> <EQUAL>=</EQUAL> <STRING>u'I am a unicode string'</STRING></expr_stmt><NEWLINE>
+</NEWLINE></simple_stmt><simple_stmt><expr_stmt><NAME>y</NAME> <EQUAL>=</EQUAL> <STRING>u'I have wæird chàrs'</STRING></expr_stmt><NEWLINE>
+</NEWLINE></simple_stmt><ENDMARKER></ENDMARKER></file_input>
View
11 refactorlib/tests/python/parse_data/test_matches_known_good_parsing/unicode.xml
@@ -0,0 +1,11 @@
+<?xml version='1.0' encoding='CP850'?>
+<file_input><simple_stmt><print_stmt># vim:encoding=CP850:
+
+# German: fuel oil recoil absorber
+# jqvwxy missing, but all non-ASCII letters in one word
+# See: http://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt
+
+# These should be equivalent:
+<NAME>print</NAME> <STRING>'Heiz”lrckstoáabd„mpfung'</STRING></print_stmt><NEWLINE>
+</NEWLINE></simple_stmt><simple_stmt><print_stmt><NAME>print</NAME><power> <STRING>u'Heiz”lrckstoáabd„mpfung'</STRING><trailer><DOT>.</DOT><NAME>encode</NAME></trailer><trailer><LPAR>(</LPAR><STRING>'cp850'</STRING><RPAR>)</RPAR></trailer></power></print_stmt><NEWLINE>
+</NEWLINE></simple_stmt><ENDMARKER></ENDMARKER></file_input>
View
9 refactorlib/tests/python/parse_data/unicode.py
@@ -0,0 +1,9 @@
+# vim:encoding=CP850:
+
+# German: fuel oil recoil absorber
+# jqvwxy missing, but all non-ASCII letters in one word
+# See: http://www.cl.cam.ac.uk/~mgk25/ucs/examples/quickbrown.txt
+
+# These should be equivalent:
+print 'Heiz”lrckstoáabd„mpfung'
+print u'Heiz”lrckstoáabd„mpfung'.encode('cp850')
View
12 refactorlib/tests/python/parse_test.py
@@ -7,6 +7,18 @@ def test_can_make_round_trip(example):
example = parse(example)
assert text == example.totext()
+@parametrize(get_examples)
+def test_encoding_detection(example):
+ from refactorlib.python.parse import detect_encoding
+ text = open(example).read()
+ example = parse(example)
+ detected_encoding = detect_encoding(text)
+
+ assert (
+ example.encoding == detected_encoding or
+ (example.encoding, detected_encoding) == ('UTF-8', None)
+ )
+
@parametrize(get_output('xml'))
def test_matches_known_good_parsing(example, output):
example = parse(example).tostring()

No commit comments for this range

Something went wrong with that request. Please try again.