1818
1919
2020class TreeWalker (object ):
21+ """Walks a tree yielding tokens
22+
23+ Tokens are dicts that all have a ``type`` field specifying the type of the
24+ token.
25+
26+ """
2127 def __init__ (self , tree ):
28+ """Creates a TreeWalker
29+
30+ :arg tree: the tree to walk
31+
32+ """
2233 self .tree = tree
2334
2435 def __iter__ (self ):
2536 raise NotImplementedError
2637
2738 def error (self , msg ):
39+ """Generates an error token with the given message
40+
41+ :arg msg: the error message
42+
43+ :returns: SerializeError token
44+
45+ """
2846 return {"type" : "SerializeError" , "data" : msg }
2947
3048 def emptyTag (self , namespace , name , attrs , hasChildren = False ):
49+ """Generates an EmptyTag token
50+
51+ :arg namespace: the namespace of the token--can be ``None``
52+
53+ :arg name: the name of the element
54+
55+ :arg attrs: the attributes of the element as a dict
56+
57+ :arg hasChildren: whether or not to yield a SerializationError because
58+ this tag shouldn't have children
59+
60+ :returns: EmptyTag token
61+
62+ """
3163 yield {"type" : "EmptyTag" , "name" : name ,
3264 "namespace" : namespace ,
3365 "data" : attrs }
3466 if hasChildren :
3567 yield self .error ("Void element has children" )
3668
3769 def startTag (self , namespace , name , attrs ):
70+ """Generates a StartTag token
71+
72+ :arg namespace: the namespace of the token--can be ``None``
73+
74+ :arg name: the name of the element
75+
76+ :arg attrs: the attributes of the element as a dict
77+
78+ :returns: StartTag token
79+
80+ """
3881 return {"type" : "StartTag" ,
3982 "name" : name ,
4083 "namespace" : namespace ,
4184 "data" : attrs }
4285
4386 def endTag (self , namespace , name ):
87+ """Generates an EndTag token
88+
89+ :arg namespace: the namespace of the token--can be ``None``
90+
91+ :arg name: the name of the element
92+
93+ :returns: EndTag token
94+
95+ """
4496 return {"type" : "EndTag" ,
4597 "name" : name ,
4698 "namespace" : namespace }
4799
48100 def text (self , data ):
101+ """Generates SpaceCharacters and Characters tokens
102+
103+ Depending on what's in the data, this generates one or more
104+ ``SpaceCharacters`` and ``Characters`` tokens.
105+
106+ For example:
107+
108+ >>> from html5lib.treewalkers.base import TreeWalker
109+ >>> # Give it an empty tree just so it instantiates
110+ >>> walker = TreeWalker([])
111+ >>> list(walker.text(''))
112+ []
113+ >>> list(walker.text(' '))
114+ [{u'data': ' ', u'type': u'SpaceCharacters'}]
115+ >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
116+ [{u'data': ' ', u'type': u'SpaceCharacters'},
117+ {u'data': u'abc', u'type': u'Characters'},
118+ {u'data': u' ', u'type': u'SpaceCharacters'}]
119+
120+ :arg data: the text data
121+
122+ :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
123+
124+ """
49125 data = data
50126 middle = data .lstrip (spaceCharacters )
51127 left = data [:len (data ) - len (middle )]
@@ -60,18 +136,44 @@ def text(self, data):
60136 yield {"type" : "SpaceCharacters" , "data" : right }
61137
62138 def comment (self , data ):
139+ """Generates a Comment token
140+
141+ :arg data: the comment
142+
143+ :returns: Comment token
144+
145+ """
63146 return {"type" : "Comment" , "data" : data }
64147
65148 def doctype (self , name , publicId = None , systemId = None ):
149+ """Generates a Doctype token
150+
151+ :arg name:
152+
153+ :arg publicId:
154+
155+ :arg systemId:
156+
157+ :returns: the Doctype token
158+
159+ """
66160 return {"type" : "Doctype" ,
67161 "name" : name ,
68162 "publicId" : publicId ,
69163 "systemId" : systemId }
70164
71165 def entity (self , name ):
166+ """Generates an Entity token
167+
168+ :arg name: the entity name
169+
170+ :returns: an Entity token
171+
172+ """
72173 return {"type" : "Entity" , "name" : name }
73174
74175 def unknown (self , nodeType ):
176+ """Handles unknown node types"""
75177 return self .error ("Unknown node type: " + nodeType )
76178
77179
0 commit comments